1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 enum FP_PREC {
 1712   fp_prec_hlf,
 1713   fp_prec_flt,
 1714   fp_prec_dbl
 1715 };
 1716 
 1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1718                                 XMMRegister p, XMMRegister q) {
 1719   if (pt == fp_prec_hlf) {
 1720     __ evucomish(p, q);
 1721   } else if (pt == fp_prec_flt) {
 1722     __ ucomiss(p, q);
 1723   } else {
 1724     __ ucomisd(p, q);
 1725   }
 1726 }
 1727 
 1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1729                          XMMRegister dst, XMMRegister src, Register scratch) {
 1730   if (pt == fp_prec_hlf) {
 1731     __ movhlf(dst, src, scratch);
 1732   } else if (pt == fp_prec_flt) {
 1733     __ movflt(dst, src);
 1734   } else {
 1735     __ movdbl(dst, src);
 1736   }
 1737 }
 1738 
 1739 // Math.min()          # Math.max()
 1740 // -----------------------------
 1741 // (v)ucomis[h/s/d]    #
 1742 // ja   -> b           # a
 1743 // jp   -> NaN         # NaN
 1744 // jb   -> a           # b
 1745 // je   -> a | b       # a & b
 1746 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1747                             XMMRegister a, XMMRegister b, Register rt,
 1748                             bool min, enum FP_PREC pt) {
 1749   Label nan, zero, below, above, done;
 1750 
 1751   emit_fp_ucom(masm, pt, a, b);
 1752 
 1753   if (dst->encoding() != (min ? b : a)->encoding()) {
 1754     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1755   } else {
 1756     __ jccb(Assembler::above, done);
 1757   }
 1758   __ jccb(Assembler::parity, nan);  // PF=1
 1759   __ jccb(Assembler::below, below); // CF=1
 1760 
 1761   // equal
 1762   // Using bitwise operations is a low cost way to compute the correct result
 1763   // for zero and non-zero inputs in this scenario except for NaN, which is
 1764   // handled separately. The mantissa and exponent are valid with either
 1765   // bitwise operation. For zero inputs, the sign bit is chosen according to
 1766   // whether a minimum or maximum value is required.
 1767   if (min) {
 1768     // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
 1769     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1770   } else {
 1771     // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
 1772     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1773   }
 1774   __ jmp(done);
 1775 
 1776   __ bind(above);
 1777   movfp(masm, pt, dst, min ? b : a, rt);
 1778   __ jmp(done);
 1779 
 1780   __ bind(nan);
 1781   if (pt == fp_prec_hlf) {
 1782     __ movl(rt, 0x00007e00); // Float16.NaN
 1783     __ evmovw(dst, rt);
 1784   } else if (pt == fp_prec_flt) {
 1785     __ movl(rt, 0x7fc00000); // Float.NaN
 1786     __ movdl(dst, rt);
 1787   } else {
 1788     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1789     __ movdq(dst, rt);
 1790   }
 1791   __ jmp(done);
 1792 
 1793   __ bind(below);
 1794   movfp(masm, pt, dst, min ? a : b, rt);
 1795 
 1796   __ bind(done);
 1797 }
 1798 
 1799 //=============================================================================
 1800 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1801 
 1802 int ConstantTable::calculate_table_base_offset() const {
 1803   return 0;  // absolute addressing, no offset
 1804 }
 1805 
 1806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1808   ShouldNotReachHere();
 1809 }
 1810 
 1811 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1812   // Empty encoding
 1813 }
 1814 
 1815 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1816   return 0;
 1817 }
 1818 
 1819 #ifndef PRODUCT
 1820 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1821   st->print("# MachConstantBaseNode (empty encoding)");
 1822 }
 1823 #endif
 1824 
 1825 
 1826 //=============================================================================
 1827 #ifndef PRODUCT
 1828 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1829   Compile* C = ra_->C;
 1830 
 1831   int framesize = C->output()->frame_size_in_bytes();
 1832   int bangsize = C->output()->bang_size_in_bytes();
 1833   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1834   // Remove wordSize for return addr which is already pushed.
 1835   framesize -= wordSize;
 1836 
 1837   if (C->output()->need_stack_bang(bangsize)) {
 1838     framesize -= wordSize;
 1839     st->print("# stack bang (%d bytes)", bangsize);
 1840     st->print("\n\t");
 1841     st->print("pushq   rbp\t# Save rbp");
 1842     if (PreserveFramePointer) {
 1843         st->print("\n\t");
 1844         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1845     }
 1846     if (framesize) {
 1847       st->print("\n\t");
 1848       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1849     }
 1850   } else {
 1851     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1852     st->print("\n\t");
 1853     framesize -= wordSize;
 1854     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1855     if (PreserveFramePointer) {
 1856       st->print("\n\t");
 1857       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1858       if (framesize > 0) {
 1859         st->print("\n\t");
 1860         st->print("addq    rbp, #%d", framesize);
 1861       }
 1862     }
 1863   }
 1864 
 1865   if (VerifyStackAtCalls) {
 1866     st->print("\n\t");
 1867     framesize -= wordSize;
 1868     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1869 #ifdef ASSERT
 1870     st->print("\n\t");
 1871     st->print("# stack alignment check");
 1872 #endif
 1873   }
 1874   if (C->stub_function() != nullptr) {
 1875     st->print("\n\t");
 1876     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1877     st->print("\n\t");
 1878     st->print("je      fast_entry\t");
 1879     st->print("\n\t");
 1880     st->print("call    #nmethod_entry_barrier_stub\t");
 1881     st->print("\n\tfast_entry:");
 1882   }
 1883   st->cr();
 1884 }
 1885 #endif
 1886 
 1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1888   Compile* C = ra_->C;
 1889 
 1890   int framesize = C->output()->frame_size_in_bytes();
 1891   int bangsize = C->output()->bang_size_in_bytes();
 1892 
 1893   if (C->clinit_barrier_on_entry()) {
 1894     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898     Register klass = rscratch1;
 1899 
 1900     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1901     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1902 
 1903     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1904 
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1909 
 1910   C->output()->set_frame_complete(__ offset());
 1911 
 1912   if (C->has_mach_constant_base_node()) {
 1913     // NOTE: We set the table base offset here because users might be
 1914     // emitted before MachConstantBaseNode.
 1915     ConstantTable& constant_table = C->output()->constant_table();
 1916     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1917   }
 1918 }
 1919 
 1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1921 {
 1922   return MachNode::size(ra_); // too many variables; just compute it
 1923                               // the hard way
 1924 }
 1925 
 1926 int MachPrologNode::reloc() const
 1927 {
 1928   return 0; // a large enough number
 1929 }
 1930 
 1931 //=============================================================================
 1932 #ifndef PRODUCT
 1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1934 {
 1935   Compile* C = ra_->C;
 1936   if (generate_vzeroupper(C)) {
 1937     st->print("vzeroupper");
 1938     st->cr(); st->print("\t");
 1939   }
 1940 
 1941   int framesize = C->output()->frame_size_in_bytes();
 1942   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1943   // Remove word for return adr already pushed
 1944   // and RBP
 1945   framesize -= 2*wordSize;
 1946 
 1947   if (framesize) {
 1948     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1949     st->print("\t");
 1950   }
 1951 
 1952   st->print_cr("popq    rbp");
 1953   if (do_polling() && C->is_method_compilation()) {
 1954     st->print("\t");
 1955     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1956                  "ja      #safepoint_stub\t"
 1957                  "# Safepoint: poll for GC");
 1958   }
 1959 }
 1960 #endif
 1961 
 1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1963 {
 1964   Compile* C = ra_->C;
 1965 
 1966   if (generate_vzeroupper(C)) {
 1967     // Clear upper bits of YMM registers when current compiled code uses
 1968     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1969     __ vzeroupper();
 1970   }
 1971 
 1972   int framesize = C->output()->frame_size_in_bytes();
 1973   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1974   // Remove word for return adr already pushed
 1975   // and RBP
 1976   framesize -= 2*wordSize;
 1977 
 1978   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1979 
 1980   if (framesize) {
 1981     __ addq(rsp, framesize);
 1982   }
 1983 
 1984   __ popq(rbp);
 1985 
 1986   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1987     __ reserved_stack_check();
 1988   }
 1989 
 1990   if (do_polling() && C->is_method_compilation()) {
 1991     Label dummy_label;
 1992     Label* code_stub = &dummy_label;
 1993     if (!C->output()->in_scratch_emit_size()) {
 1994       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1995       C->output()->add_stub(stub);
 1996       code_stub = &stub->entry();
 1997     }
 1998     __ relocate(relocInfo::poll_return_type);
 1999     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2000   }
 2001 }
 2002 
 2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2004 {
 2005   return MachNode::size(ra_); // too many variables; just compute it
 2006                               // the hard way
 2007 }
 2008 
 2009 int MachEpilogNode::reloc() const
 2010 {
 2011   return 2; // a large enough number
 2012 }
 2013 
 2014 const Pipeline* MachEpilogNode::pipeline() const
 2015 {
 2016   return MachNode::pipeline_class();
 2017 }
 2018 
 2019 //=============================================================================
 2020 
 2021 enum RC {
 2022   rc_bad,
 2023   rc_int,
 2024   rc_kreg,
 2025   rc_float,
 2026   rc_stack
 2027 };
 2028 
 2029 static enum RC rc_class(OptoReg::Name reg)
 2030 {
 2031   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2032 
 2033   if (OptoReg::is_stack(reg)) return rc_stack;
 2034 
 2035   VMReg r = OptoReg::as_VMReg(reg);
 2036 
 2037   if (r->is_Register()) return rc_int;
 2038 
 2039   if (r->is_KRegister()) return rc_kreg;
 2040 
 2041   assert(r->is_XMMRegister(), "must be");
 2042   return rc_float;
 2043 }
 2044 
 2045 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2046 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2047                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2048 
 2049 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2050                      int stack_offset, int reg, uint ireg, outputStream* st);
 2051 
 2052 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2053                                       int dst_offset, uint ireg, outputStream* st) {
 2054   if (masm) {
 2055     switch (ireg) {
 2056     case Op_VecS:
 2057       __ movq(Address(rsp, -8), rax);
 2058       __ movl(rax, Address(rsp, src_offset));
 2059       __ movl(Address(rsp, dst_offset), rax);
 2060       __ movq(rax, Address(rsp, -8));
 2061       break;
 2062     case Op_VecD:
 2063       __ pushq(Address(rsp, src_offset));
 2064       __ popq (Address(rsp, dst_offset));
 2065       break;
 2066     case Op_VecX:
 2067       __ pushq(Address(rsp, src_offset));
 2068       __ popq (Address(rsp, dst_offset));
 2069       __ pushq(Address(rsp, src_offset+8));
 2070       __ popq (Address(rsp, dst_offset+8));
 2071       break;
 2072     case Op_VecY:
 2073       __ vmovdqu(Address(rsp, -32), xmm0);
 2074       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2075       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2076       __ vmovdqu(xmm0, Address(rsp, -32));
 2077       break;
 2078     case Op_VecZ:
 2079       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2080       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2081       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2082       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2083       break;
 2084     default:
 2085       ShouldNotReachHere();
 2086     }
 2087 #ifndef PRODUCT
 2088   } else {
 2089     switch (ireg) {
 2090     case Op_VecS:
 2091       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2092                 "movl    rax, [rsp + #%d]\n\t"
 2093                 "movl    [rsp + #%d], rax\n\t"
 2094                 "movq    rax, [rsp - #8]",
 2095                 src_offset, dst_offset);
 2096       break;
 2097     case Op_VecD:
 2098       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset);
 2101       break;
 2102      case Op_VecX:
 2103       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2104                 "popq    [rsp + #%d]\n\t"
 2105                 "pushq   [rsp + #%d]\n\t"
 2106                 "popq    [rsp + #%d]",
 2107                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2108       break;
 2109     case Op_VecY:
 2110       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #32]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     case Op_VecZ:
 2117       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2118                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2119                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2120                 "vmovdqu xmm0, [rsp - #64]",
 2121                 src_offset, dst_offset);
 2122       break;
 2123     default:
 2124       ShouldNotReachHere();
 2125     }
 2126 #endif
 2127   }
 2128 }
 2129 
 2130 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2131                                        PhaseRegAlloc* ra_,
 2132                                        bool do_size,
 2133                                        outputStream* st) const {
 2134   assert(masm != nullptr || st  != nullptr, "sanity");
 2135   // Get registers to move
 2136   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2137   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2138   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2139   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2140 
 2141   enum RC src_second_rc = rc_class(src_second);
 2142   enum RC src_first_rc = rc_class(src_first);
 2143   enum RC dst_second_rc = rc_class(dst_second);
 2144   enum RC dst_first_rc = rc_class(dst_first);
 2145 
 2146   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2147          "must move at least 1 register" );
 2148 
 2149   if (src_first == dst_first && src_second == dst_second) {
 2150     // Self copy, no move
 2151     return 0;
 2152   }
 2153   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
 2154     uint ireg = ideal_reg();
 2155     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2156     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2157     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2158       // mem -> mem
 2159       int src_offset = ra_->reg2offset(src_first);
 2160       int dst_offset = ra_->reg2offset(dst_first);
 2161       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2162     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2163       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2164     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2165       int stack_offset = ra_->reg2offset(dst_first);
 2166       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2167     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2168       int stack_offset = ra_->reg2offset(src_first);
 2169       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2170     } else {
 2171       ShouldNotReachHere();
 2172     }
 2173     return 0;
 2174   }
 2175   if (src_first_rc == rc_stack) {
 2176     // mem ->
 2177     if (dst_first_rc == rc_stack) {
 2178       // mem -> mem
 2179       assert(src_second != dst_first, "overlap");
 2180       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2181           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2182         // 64-bit
 2183         int src_offset = ra_->reg2offset(src_first);
 2184         int dst_offset = ra_->reg2offset(dst_first);
 2185         if (masm) {
 2186           __ pushq(Address(rsp, src_offset));
 2187           __ popq (Address(rsp, dst_offset));
 2188 #ifndef PRODUCT
 2189         } else {
 2190           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2191                     "popq    [rsp + #%d]",
 2192                      src_offset, dst_offset);
 2193 #endif
 2194         }
 2195       } else {
 2196         // 32-bit
 2197         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2198         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2199         // No pushl/popl, so:
 2200         int src_offset = ra_->reg2offset(src_first);
 2201         int dst_offset = ra_->reg2offset(dst_first);
 2202         if (masm) {
 2203           __ movq(Address(rsp, -8), rax);
 2204           __ movl(rax, Address(rsp, src_offset));
 2205           __ movl(Address(rsp, dst_offset), rax);
 2206           __ movq(rax, Address(rsp, -8));
 2207 #ifndef PRODUCT
 2208         } else {
 2209           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2210                     "movl    rax, [rsp + #%d]\n\t"
 2211                     "movl    [rsp + #%d], rax\n\t"
 2212                     "movq    rax, [rsp - #8]",
 2213                      src_offset, dst_offset);
 2214 #endif
 2215         }
 2216       }
 2217       return 0;
 2218     } else if (dst_first_rc == rc_int) {
 2219       // mem -> gpr
 2220       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2221           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2222         // 64-bit
 2223         int offset = ra_->reg2offset(src_first);
 2224         if (masm) {
 2225           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2226 #ifndef PRODUCT
 2227         } else {
 2228           st->print("movq    %s, [rsp + #%d]\t# spill",
 2229                      Matcher::regName[dst_first],
 2230                      offset);
 2231 #endif
 2232         }
 2233       } else {
 2234         // 32-bit
 2235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("movl    %s, [rsp + #%d]\t# spill",
 2243                      Matcher::regName[dst_first],
 2244                      offset);
 2245 #endif
 2246         }
 2247       }
 2248       return 0;
 2249     } else if (dst_first_rc == rc_float) {
 2250       // mem-> xmm
 2251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2253         // 64-bit
 2254         int offset = ra_->reg2offset(src_first);
 2255         if (masm) {
 2256           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2257 #ifndef PRODUCT
 2258         } else {
 2259           st->print("%s  %s, [rsp + #%d]\t# spill",
 2260                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2261                      Matcher::regName[dst_first],
 2262                      offset);
 2263 #endif
 2264         }
 2265       } else {
 2266         // 32-bit
 2267         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2268         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2269         int offset = ra_->reg2offset(src_first);
 2270         if (masm) {
 2271           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2272 #ifndef PRODUCT
 2273         } else {
 2274           st->print("movss   %s, [rsp + #%d]\t# spill",
 2275                      Matcher::regName[dst_first],
 2276                      offset);
 2277 #endif
 2278         }
 2279       }
 2280       return 0;
 2281     } else if (dst_first_rc == rc_kreg) {
 2282       // mem -> kreg
 2283       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2284           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2285         // 64-bit
 2286         int offset = ra_->reg2offset(src_first);
 2287         if (masm) {
 2288           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2289 #ifndef PRODUCT
 2290         } else {
 2291           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2292                      Matcher::regName[dst_first],
 2293                      offset);
 2294 #endif
 2295         }
 2296       }
 2297       return 0;
 2298     }
 2299   } else if (src_first_rc == rc_int) {
 2300     // gpr ->
 2301     if (dst_first_rc == rc_stack) {
 2302       // gpr -> mem
 2303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2305         // 64-bit
 2306         int offset = ra_->reg2offset(dst_first);
 2307         if (masm) {
 2308           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2309 #ifndef PRODUCT
 2310         } else {
 2311           st->print("movq    [rsp + #%d], %s\t# spill",
 2312                      offset,
 2313                      Matcher::regName[src_first]);
 2314 #endif
 2315         }
 2316       } else {
 2317         // 32-bit
 2318         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2319         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2320         int offset = ra_->reg2offset(dst_first);
 2321         if (masm) {
 2322           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movl    [rsp + #%d], %s\t# spill",
 2326                      offset,
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330       }
 2331       return 0;
 2332     } else if (dst_first_rc == rc_int) {
 2333       // gpr -> gpr
 2334       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2335           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2336         // 64-bit
 2337         if (masm) {
 2338           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2339                   as_Register(Matcher::_regEncode[src_first]));
 2340 #ifndef PRODUCT
 2341         } else {
 2342           st->print("movq    %s, %s\t# spill",
 2343                      Matcher::regName[dst_first],
 2344                      Matcher::regName[src_first]);
 2345 #endif
 2346         }
 2347         return 0;
 2348       } else {
 2349         // 32-bit
 2350         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2351         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2352         if (masm) {
 2353           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2354                   as_Register(Matcher::_regEncode[src_first]));
 2355 #ifndef PRODUCT
 2356         } else {
 2357           st->print("movl    %s, %s\t# spill",
 2358                      Matcher::regName[dst_first],
 2359                      Matcher::regName[src_first]);
 2360 #endif
 2361         }
 2362         return 0;
 2363       }
 2364     } else if (dst_first_rc == rc_float) {
 2365       // gpr -> xmm
 2366       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2367           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2368         // 64-bit
 2369         if (masm) {
 2370           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2371 #ifndef PRODUCT
 2372         } else {
 2373           st->print("movdq   %s, %s\t# spill",
 2374                      Matcher::regName[dst_first],
 2375                      Matcher::regName[src_first]);
 2376 #endif
 2377         }
 2378       } else {
 2379         // 32-bit
 2380         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2381         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2382         if (masm) {
 2383           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2384 #ifndef PRODUCT
 2385         } else {
 2386           st->print("movdl   %s, %s\t# spill",
 2387                      Matcher::regName[dst_first],
 2388                      Matcher::regName[src_first]);
 2389 #endif
 2390         }
 2391       }
 2392       return 0;
 2393     } else if (dst_first_rc == rc_kreg) {
 2394       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2395           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2396         // 64-bit
 2397         if (masm) {
 2398           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2399   #ifndef PRODUCT
 2400         } else {
 2401            st->print("kmovq   %s, %s\t# spill",
 2402                        Matcher::regName[dst_first],
 2403                        Matcher::regName[src_first]);
 2404   #endif
 2405         }
 2406       }
 2407       Unimplemented();
 2408       return 0;
 2409     }
 2410   } else if (src_first_rc == rc_float) {
 2411     // xmm ->
 2412     if (dst_first_rc == rc_stack) {
 2413       // xmm -> mem
 2414       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2415           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2416         // 64-bit
 2417         int offset = ra_->reg2offset(dst_first);
 2418         if (masm) {
 2419           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2420 #ifndef PRODUCT
 2421         } else {
 2422           st->print("movsd   [rsp + #%d], %s\t# spill",
 2423                      offset,
 2424                      Matcher::regName[src_first]);
 2425 #endif
 2426         }
 2427       } else {
 2428         // 32-bit
 2429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2431         int offset = ra_->reg2offset(dst_first);
 2432         if (masm) {
 2433           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2434 #ifndef PRODUCT
 2435         } else {
 2436           st->print("movss   [rsp + #%d], %s\t# spill",
 2437                      offset,
 2438                      Matcher::regName[src_first]);
 2439 #endif
 2440         }
 2441       }
 2442       return 0;
 2443     } else if (dst_first_rc == rc_int) {
 2444       // xmm -> gpr
 2445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2447         // 64-bit
 2448         if (masm) {
 2449           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2450 #ifndef PRODUCT
 2451         } else {
 2452           st->print("movdq   %s, %s\t# spill",
 2453                      Matcher::regName[dst_first],
 2454                      Matcher::regName[src_first]);
 2455 #endif
 2456         }
 2457       } else {
 2458         // 32-bit
 2459         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2460         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2461         if (masm) {
 2462           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2463 #ifndef PRODUCT
 2464         } else {
 2465           st->print("movdl   %s, %s\t# spill",
 2466                      Matcher::regName[dst_first],
 2467                      Matcher::regName[src_first]);
 2468 #endif
 2469         }
 2470       }
 2471       return 0;
 2472     } else if (dst_first_rc == rc_float) {
 2473       // xmm -> xmm
 2474       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2475           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2476         // 64-bit
 2477         if (masm) {
 2478           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2479 #ifndef PRODUCT
 2480         } else {
 2481           st->print("%s  %s, %s\t# spill",
 2482                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2483                      Matcher::regName[dst_first],
 2484                      Matcher::regName[src_first]);
 2485 #endif
 2486         }
 2487       } else {
 2488         // 32-bit
 2489         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2490         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2491         if (masm) {
 2492           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2493 #ifndef PRODUCT
 2494         } else {
 2495           st->print("%s  %s, %s\t# spill",
 2496                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2497                      Matcher::regName[dst_first],
 2498                      Matcher::regName[src_first]);
 2499 #endif
 2500         }
 2501       }
 2502       return 0;
 2503     } else if (dst_first_rc == rc_kreg) {
 2504       assert(false, "Illegal spilling");
 2505       return 0;
 2506     }
 2507   } else if (src_first_rc == rc_kreg) {
 2508     if (dst_first_rc == rc_stack) {
 2509       // mem -> kreg
 2510       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2511           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2512         // 64-bit
 2513         int offset = ra_->reg2offset(dst_first);
 2514         if (masm) {
 2515           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2516 #ifndef PRODUCT
 2517         } else {
 2518           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2519                      offset,
 2520                      Matcher::regName[src_first]);
 2521 #endif
 2522         }
 2523       }
 2524       return 0;
 2525     } else if (dst_first_rc == rc_int) {
 2526       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2527           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2528         // 64-bit
 2529         if (masm) {
 2530           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2531 #ifndef PRODUCT
 2532         } else {
 2533          st->print("kmovq   %s, %s\t# spill",
 2534                      Matcher::regName[dst_first],
 2535                      Matcher::regName[src_first]);
 2536 #endif
 2537         }
 2538       }
 2539       Unimplemented();
 2540       return 0;
 2541     } else if (dst_first_rc == rc_kreg) {
 2542       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2543           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2544         // 64-bit
 2545         if (masm) {
 2546           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2547 #ifndef PRODUCT
 2548         } else {
 2549          st->print("kmovq   %s, %s\t# spill",
 2550                      Matcher::regName[dst_first],
 2551                      Matcher::regName[src_first]);
 2552 #endif
 2553         }
 2554       }
 2555       return 0;
 2556     } else if (dst_first_rc == rc_float) {
 2557       assert(false, "Illegal spill");
 2558       return 0;
 2559     }
 2560   }
 2561 
 2562   assert(0," foo ");
 2563   Unimplemented();
 2564   return 0;
 2565 }
 2566 
 2567 #ifndef PRODUCT
 2568 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2569   implementation(nullptr, ra_, false, st);
 2570 }
 2571 #endif
 2572 
 2573 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2574   implementation(masm, ra_, false, nullptr);
 2575 }
 2576 
 2577 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2578   return MachNode::size(ra_);
 2579 }
 2580 
 2581 //=============================================================================
 2582 #ifndef PRODUCT
 2583 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   int reg = ra_->get_reg_first(this);
 2587   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2588             Matcher::regName[reg], offset);
 2589 }
 2590 #endif
 2591 
 2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_encode(this);
 2596 
 2597   __ lea(as_Register(reg), Address(rsp, offset));
 2598 }
 2599 
 2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2601 {
 2602   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2603   if (ra_->get_encode(this) > 15) {
 2604     return (offset < 0x80) ? 6 : 9; // REX2
 2605   } else {
 2606     return (offset < 0x80) ? 5 : 8; // REX
 2607   }
 2608 }
 2609 
 2610 //=============================================================================
 2611 #ifndef PRODUCT
 2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2613 {
 2614   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2615   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2616   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2617 }
 2618 #endif
 2619 
 2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2621 {
 2622   __ ic_check(InteriorEntryAlignment);
 2623 }
 2624 
 2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2626 {
 2627   return MachNode::size(ra_); // too many variables; just compute it
 2628                               // the hard way
 2629 }
 2630 
 2631 
 2632 //=============================================================================
 2633 
 2634 bool Matcher::supports_vector_calling_convention(void) {
 2635   return EnableVectorSupport;
 2636 }
 2637 
 2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2639   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2640 }
 2641 
 2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2643   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2644 }
 2645 
 2646 #ifdef ASSERT
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2649 }
 2650 #endif
 2651 
 2652 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2653                                             int oper_index) {
 2654   if (mdef == nullptr) {
 2655     return false;
 2656   }
 2657 
 2658   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2659       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2660     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2661     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2662     return false;
 2663   }
 2664 
 2665   // Complex memory operand covers multiple incoming edges needed for
 2666   // address computation. Biasing def towards any address component will not
 2667   // result in NDD demotion by assembler.
 2668   if (mdef->operand_num_edges(oper_index) != 1) {
 2669     return false;
 2670   }
 2671 
 2672   // Demotion candidate must be register mask compatible with definition.
 2673   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2674   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2675     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2676     return false;
 2677   }
 2678 
 2679   switch (oper_index) {
 2680   // First operand of MachNode corresponding to Intel APX NDD selection
 2681   // pattern can share its assigned register with definition operand if
 2682   // their live ranges do not overlap. In such a scenario we can demote
 2683   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2684   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2685   // are decorated with a special flag by instruction selector.
 2686   case 1:
 2687     return is_ndd_demotable_opr1(mdef);
 2688 
 2689   // Definition operand of commutative operation can be biased towards second
 2690   // operand.
 2691   case 2:
 2692     return is_ndd_demotable_opr2(mdef);
 2693 
 2694   // Current scheme only selects up to two biasing candidates
 2695   default:
 2696     assert(false, "unhandled operand index: %s", mdef->Name());
 2697     break;
 2698   }
 2699 
 2700   return false;
 2701 }
 2702 
 2703 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2704   assert(EnableVectorSupport, "sanity");
 2705   int lo = XMM0_num;
 2706   int hi = XMM0b_num;
 2707   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2708   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2709   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2710   return OptoRegPair(hi, lo);
 2711 }
 2712 
 2713 // Is this branch offset short enough that a short branch can be used?
 2714 //
 2715 // NOTE: If the platform does not provide any short branch variants, then
 2716 //       this method should return false for offset 0.
 2717 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2718   // The passed offset is relative to address of the branch.
 2719   // On 86 a branch displacement is calculated relative to address
 2720   // of a next instruction.
 2721   offset -= br_size;
 2722 
 2723   // the short version of jmpConUCF2 contains multiple branches,
 2724   // making the reach slightly less
 2725   if (rule == jmpConUCF2_rule)
 2726     return (-126 <= offset && offset <= 125);
 2727   return (-128 <= offset && offset <= 127);
 2728 }
 2729 
 2730 #ifdef ASSERT
 2731 // Return whether or not this register is ever used as an argument.
 2732 bool Matcher::can_be_java_arg(int reg)
 2733 {
 2734   return
 2735     reg ==  RDI_num || reg == RDI_H_num ||
 2736     reg ==  RSI_num || reg == RSI_H_num ||
 2737     reg ==  RDX_num || reg == RDX_H_num ||
 2738     reg ==  RCX_num || reg == RCX_H_num ||
 2739     reg ==   R8_num || reg ==  R8_H_num ||
 2740     reg ==   R9_num || reg ==  R9_H_num ||
 2741     reg ==  R12_num || reg == R12_H_num ||
 2742     reg == XMM0_num || reg == XMM0b_num ||
 2743     reg == XMM1_num || reg == XMM1b_num ||
 2744     reg == XMM2_num || reg == XMM2b_num ||
 2745     reg == XMM3_num || reg == XMM3b_num ||
 2746     reg == XMM4_num || reg == XMM4b_num ||
 2747     reg == XMM5_num || reg == XMM5b_num ||
 2748     reg == XMM6_num || reg == XMM6b_num ||
 2749     reg == XMM7_num || reg == XMM7b_num;
 2750 }
 2751 #endif
 2752 
 2753 uint Matcher::int_pressure_limit()
 2754 {
 2755   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2756 }
 2757 
 2758 uint Matcher::float_pressure_limit()
 2759 {
 2760   // After experiment around with different values, the following default threshold
 2761   // works best for LCM's register pressure scheduling on x64.
 2762   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2763   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2764   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2765 }
 2766 
 2767 // Register for DIVI projection of divmodI
 2768 const RegMask& Matcher::divI_proj_mask() {
 2769   return INT_RAX_REG_mask();
 2770 }
 2771 
 2772 // Register for MODI projection of divmodI
 2773 const RegMask& Matcher::modI_proj_mask() {
 2774   return INT_RDX_REG_mask();
 2775 }
 2776 
 2777 // Register for DIVL projection of divmodL
 2778 const RegMask& Matcher::divL_proj_mask() {
 2779   return LONG_RAX_REG_mask();
 2780 }
 2781 
 2782 // Register for MODL projection of divmodL
 2783 const RegMask& Matcher::modL_proj_mask() {
 2784   return LONG_RDX_REG_mask();
 2785 }
 2786 
 2787 %}
 2788 
 2789 source_hpp %{
 2790 // Header information of the source block.
 2791 // Method declarations/definitions which are used outside
 2792 // the ad-scope can conveniently be defined here.
 2793 //
 2794 // To keep related declarations/definitions/uses close together,
 2795 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2796 
 2797 #include "runtime/vm_version.hpp"
 2798 
 2799 class NativeJump;
 2800 
 2801 class CallStubImpl {
 2802 
 2803   //--------------------------------------------------------------
 2804   //---<  Used for optimization in Compile::shorten_branches  >---
 2805   //--------------------------------------------------------------
 2806 
 2807  public:
 2808   // Size of call trampoline stub.
 2809   static uint size_call_trampoline() {
 2810     return 0; // no call trampolines on this platform
 2811   }
 2812 
 2813   // number of relocations needed by a call trampoline stub
 2814   static uint reloc_call_trampoline() {
 2815     return 0; // no call trampolines on this platform
 2816   }
 2817 };
 2818 
 2819 class HandlerImpl {
 2820 
 2821  public:
 2822 
 2823   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2824 
 2825   static uint size_deopt_handler() {
 2826     // one call and one jmp.
 2827     return 7;
 2828   }
 2829 };
 2830 
 2831 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2832   switch(bytes) {
 2833     case  4: // fall-through
 2834     case  8: // fall-through
 2835     case 16: return Assembler::AVX_128bit;
 2836     case 32: return Assembler::AVX_256bit;
 2837     case 64: return Assembler::AVX_512bit;
 2838 
 2839     default: {
 2840       ShouldNotReachHere();
 2841       return Assembler::AVX_NoVec;
 2842     }
 2843   }
 2844 }
 2845 
 2846 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2847   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2848 }
 2849 
 2850 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2851   uint def_idx = use->operand_index(opnd);
 2852   Node* def = use->in(def_idx);
 2853   return vector_length_encoding(def);
 2854 }
 2855 
 2856 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2857   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2858          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2859 }
 2860 
 2861 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2862   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2863            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2864 }
 2865 
 2866 class Node::PD {
 2867 public:
 2868   enum NodeFlags : uint64_t {
 2869     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2870     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2871     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2872     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2873     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2874     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2875     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2876     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2877     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2878     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2879     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2880     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2881     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2882     _last_flag                = Flag_ndd_demotable_opr2
 2883   };
 2884 };
 2885 
 2886 %} // end source_hpp
 2887 
 2888 source %{
 2889 
 2890 #include "opto/addnode.hpp"
 2891 #include "c2_intelJccErratum_x86.hpp"
 2892 
 2893 void PhaseOutput::pd_perform_mach_node_analysis() {
 2894   if (VM_Version::has_intel_jcc_erratum()) {
 2895     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2896     _buf_sizes._code += extra_padding;
 2897   }
 2898 }
 2899 
 2900 int MachNode::pd_alignment_required() const {
 2901   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2902     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2903     return IntelJccErratum::largest_jcc_size() + 1;
 2904   } else {
 2905     return 1;
 2906   }
 2907 }
 2908 
 2909 int MachNode::compute_padding(int current_offset) const {
 2910   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2911     Compile* C = Compile::current();
 2912     PhaseOutput* output = C->output();
 2913     Block* block = output->block();
 2914     int index = output->index();
 2915     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2916   } else {
 2917     return 0;
 2918   }
 2919 }
 2920 
 2921 // Emit deopt handler code.
 2922 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2923 
 2924   // Note that the code buffer's insts_mark is always relative to insts.
 2925   // That's why we must use the macroassembler to generate a handler.
 2926   address base = __ start_a_stub(size_deopt_handler());
 2927   if (base == nullptr) {
 2928     ciEnv::current()->record_failure("CodeCache is full");
 2929     return 0;  // CodeBuffer::expand failed
 2930   }
 2931   int offset = __ offset();
 2932 
 2933   Label start;
 2934   __ bind(start);
 2935 
 2936   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2937 
 2938   int entry_offset = __ offset();
 2939 
 2940   __ jmp(start);
 2941 
 2942   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2943   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2944          "out of bounds read in post-call NOP check");
 2945   __ end_a_stub();
 2946   return entry_offset;
 2947 }
 2948 
 2949 static Assembler::Width widthForType(BasicType bt) {
 2950   if (bt == T_BYTE) {
 2951     return Assembler::B;
 2952   } else if (bt == T_SHORT) {
 2953     return Assembler::W;
 2954   } else if (bt == T_INT) {
 2955     return Assembler::D;
 2956   } else {
 2957     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2958     return Assembler::Q;
 2959   }
 2960 }
 2961 
 2962 //=============================================================================
 2963 
 2964   // Float masks come from different places depending on platform.
 2965   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2966   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2967   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2968   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2969   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2970   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2971   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2972   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2973   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2974   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2975   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2976   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2977   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2978   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2979   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2980   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2981   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2982   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2983   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2984 
 2985 //=============================================================================
 2986 bool Matcher::match_rule_supported(int opcode) {
 2987   if (!has_match_rule(opcode)) {
 2988     return false; // no match rule present
 2989   }
 2990   switch (opcode) {
 2991     case Op_AbsVL:
 2992     case Op_StoreVectorScatter:
 2993       if (UseAVX < 3) {
 2994         return false;
 2995       }
 2996       break;
 2997     case Op_PopCountI:
 2998     case Op_PopCountL:
 2999       if (!UsePopCountInstruction) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_PopCountVI:
 3004       if (UseAVX < 2) {
 3005         return false;
 3006       }
 3007       break;
 3008     case Op_CompressV:
 3009     case Op_ExpandV:
 3010     case Op_PopCountVL:
 3011       if (UseAVX < 2) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_MulVI:
 3016       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_MulVL:
 3021       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3022         return false;
 3023       }
 3024       break;
 3025     case Op_MulReductionVL:
 3026       if (VM_Version::supports_avx512dq() == false) {
 3027         return false;
 3028       }
 3029       break;
 3030     case Op_AbsVB:
 3031     case Op_AbsVS:
 3032     case Op_AbsVI:
 3033     case Op_AddReductionVI:
 3034     case Op_AndReductionV:
 3035     case Op_OrReductionV:
 3036     case Op_XorReductionV:
 3037       if (UseSSE < 3) { // requires at least SSSE3
 3038         return false;
 3039       }
 3040       break;
 3041     case Op_MaxHF:
 3042     case Op_MinHF:
 3043       if (!VM_Version::supports_avx512vlbw()) {
 3044         return false;
 3045       }  // fallthrough
 3046     case Op_AddHF:
 3047     case Op_DivHF:
 3048     case Op_FmaHF:
 3049     case Op_MulHF:
 3050     case Op_ReinterpretS2HF:
 3051     case Op_ReinterpretHF2S:
 3052     case Op_SubHF:
 3053     case Op_SqrtHF:
 3054       if (!VM_Version::supports_avx512_fp16()) {
 3055         return false;
 3056       }
 3057       break;
 3058     case Op_VectorLoadShuffle:
 3059     case Op_VectorRearrange:
 3060     case Op_MulReductionVI:
 3061       if (UseSSE < 4) { // requires at least SSE4
 3062         return false;
 3063       }
 3064       break;
 3065     case Op_IsInfiniteF:
 3066     case Op_IsInfiniteD:
 3067       if (!VM_Version::supports_avx512dq()) {
 3068         return false;
 3069       }
 3070       break;
 3071     case Op_SqrtVD:
 3072     case Op_SqrtVF:
 3073     case Op_VectorMaskCmp:
 3074     case Op_VectorCastB2X:
 3075     case Op_VectorCastS2X:
 3076     case Op_VectorCastI2X:
 3077     case Op_VectorCastL2X:
 3078     case Op_VectorCastF2X:
 3079     case Op_VectorCastD2X:
 3080     case Op_VectorUCastB2X:
 3081     case Op_VectorUCastS2X:
 3082     case Op_VectorUCastI2X:
 3083     case Op_VectorMaskCast:
 3084       if (UseAVX < 1) { // enabled for AVX only
 3085         return false;
 3086       }
 3087       break;
 3088     case Op_PopulateIndex:
 3089       if (UseAVX < 2) {
 3090         return false;
 3091       }
 3092       break;
 3093     case Op_RoundVF:
 3094       if (UseAVX < 2) { // enabled for AVX2 only
 3095         return false;
 3096       }
 3097       break;
 3098     case Op_RoundVD:
 3099       if (UseAVX < 3) {
 3100         return false;  // enabled for AVX3 only
 3101       }
 3102       break;
 3103     case Op_CompareAndSwapL:
 3104     case Op_CompareAndSwapP:
 3105       break;
 3106     case Op_StrIndexOf:
 3107       if (!UseSSE42Intrinsics) {
 3108         return false;
 3109       }
 3110       break;
 3111     case Op_StrIndexOfChar:
 3112       if (!UseSSE42Intrinsics) {
 3113         return false;
 3114       }
 3115       break;
 3116     case Op_OnSpinWait:
 3117       if (VM_Version::supports_on_spin_wait() == false) {
 3118         return false;
 3119       }
 3120       break;
 3121     case Op_MulVB:
 3122     case Op_LShiftVB:
 3123     case Op_RShiftVB:
 3124     case Op_URShiftVB:
 3125     case Op_VectorInsert:
 3126     case Op_VectorLoadMask:
 3127     case Op_VectorStoreMask:
 3128     case Op_VectorBlend:
 3129       if (UseSSE < 4) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MaxD:
 3134     case Op_MaxF:
 3135     case Op_MinD:
 3136     case Op_MinF:
 3137       if (UseAVX < 1) { // enabled for AVX only
 3138         return false;
 3139       }
 3140       break;
 3141     case Op_CacheWB:
 3142     case Op_CacheWBPreSync:
 3143     case Op_CacheWBPostSync:
 3144       if (!VM_Version::supports_data_cache_line_flush()) {
 3145         return false;
 3146       }
 3147       break;
 3148     case Op_ExtractB:
 3149     case Op_ExtractL:
 3150     case Op_ExtractI:
 3151     case Op_RoundDoubleMode:
 3152       if (UseSSE < 4) {
 3153         return false;
 3154       }
 3155       break;
 3156     case Op_RoundDoubleModeV:
 3157       if (VM_Version::supports_avx() == false) {
 3158         return false; // 128bit vroundpd is not available
 3159       }
 3160       break;
 3161     case Op_LoadVectorGather:
 3162     case Op_LoadVectorGatherMasked:
 3163       if (UseAVX < 2) {
 3164         return false;
 3165       }
 3166       break;
 3167     case Op_FmaF:
 3168     case Op_FmaD:
 3169     case Op_FmaVD:
 3170     case Op_FmaVF:
 3171       if (!UseFMA) {
 3172         return false;
 3173       }
 3174       break;
 3175     case Op_MacroLogicV:
 3176       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3177         return false;
 3178       }
 3179       break;
 3180 
 3181     case Op_VectorCmpMasked:
 3182     case Op_VectorMaskGen:
 3183       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_VectorMaskFirstTrue:
 3188     case Op_VectorMaskLastTrue:
 3189     case Op_VectorMaskTrueCount:
 3190     case Op_VectorMaskToLong:
 3191       if (UseAVX < 1) {
 3192          return false;
 3193       }
 3194       break;
 3195     case Op_RoundF:
 3196     case Op_RoundD:
 3197       break;
 3198     case Op_CopySignD:
 3199     case Op_CopySignF:
 3200       if (UseAVX < 3)  {
 3201         return false;
 3202       }
 3203       if (!VM_Version::supports_avx512vl()) {
 3204         return false;
 3205       }
 3206       break;
 3207     case Op_CompressBits:
 3208     case Op_ExpandBits:
 3209       if (!VM_Version::supports_bmi2()) {
 3210         return false;
 3211       }
 3212       break;
 3213     case Op_CompressM:
 3214       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3215         return false;
 3216       }
 3217       break;
 3218     case Op_ConvF2HF:
 3219     case Op_ConvHF2F:
 3220       if (!VM_Version::supports_float16()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_VectorCastF2HF:
 3225     case Op_VectorCastHF2F:
 3226       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3227         return false;
 3228       }
 3229       break;
 3230   }
 3231   return true;  // Match rules are supported by default.
 3232 }
 3233 
 3234 //------------------------------------------------------------------------
 3235 
 3236 static inline bool is_pop_count_instr_target(BasicType bt) {
 3237   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3238          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3239 }
 3240 
 3241 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3242   return match_rule_supported_vector(opcode, vlen, bt);
 3243 }
 3244 
 3245 // Identify extra cases that we might want to provide match rules for vector nodes and
 3246 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3247 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3248   if (!match_rule_supported(opcode)) {
 3249     return false;
 3250   }
 3251   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3252   //   * SSE2 supports 128bit vectors for all types;
 3253   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3254   //   * AVX2 supports 256bit vectors for all types;
 3255   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3256   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3257   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3258   // And MaxVectorSize is taken into account as well.
 3259   if (!vector_size_supported(bt, vlen)) {
 3260     return false;
 3261   }
 3262   // Special cases which require vector length follow:
 3263   //   * implementation limitations
 3264   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3265   //   * 128bit vroundpd instruction is present only in AVX1
 3266   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3267   switch (opcode) {
 3268     case Op_MaxVHF:
 3269     case Op_MinVHF:
 3270       if (!VM_Version::supports_avx512bw()) {
 3271         return false;
 3272       }
 3273     case Op_AddVHF:
 3274     case Op_DivVHF:
 3275     case Op_FmaVHF:
 3276     case Op_MulVHF:
 3277     case Op_SubVHF:
 3278     case Op_SqrtVHF:
 3279       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3280         return false;
 3281       }
 3282       if (!VM_Version::supports_avx512_fp16()) {
 3283         return false;
 3284       }
 3285       break;
 3286     case Op_AbsVF:
 3287     case Op_NegVF:
 3288       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3289         return false; // 512bit vandps and vxorps are not available
 3290       }
 3291       break;
 3292     case Op_AbsVD:
 3293     case Op_NegVD:
 3294       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3295         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3296       }
 3297       break;
 3298     case Op_RotateRightV:
 3299     case Op_RotateLeftV:
 3300       if (bt != T_INT && bt != T_LONG) {
 3301         return false;
 3302       } // fallthrough
 3303     case Op_MacroLogicV:
 3304       if (!VM_Version::supports_evex() ||
 3305           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3306         return false;
 3307       }
 3308       break;
 3309     case Op_ClearArray:
 3310     case Op_VectorMaskGen:
 3311     case Op_VectorCmpMasked:
 3312       if (!VM_Version::supports_avx512bw()) {
 3313         return false;
 3314       }
 3315       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3316         return false;
 3317       }
 3318       break;
 3319     case Op_LoadVectorMasked:
 3320     case Op_StoreVectorMasked:
 3321       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_UMinV:
 3326     case Op_UMaxV:
 3327       if (UseAVX == 0) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_UMinReductionV:
 3332     case Op_UMaxReductionV:
 3333       if (UseAVX == 0) {
 3334         return false;
 3335       }
 3336       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3337         return false;
 3338       }
 3339       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_MaxV:
 3344     case Op_MinV:
 3345       if (UseSSE < 4 && is_integral_type(bt)) {
 3346         return false;
 3347       }
 3348       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3349           // Float/Double intrinsics are enabled for AVX family currently.
 3350           if (UseAVX == 0) {
 3351             return false;
 3352           }
 3353           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3354             return false;
 3355           }
 3356       }
 3357       break;
 3358     case Op_CallLeafVector:
 3359       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_AddReductionVI:
 3364       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3365         return false;
 3366       }
 3367       // fallthrough
 3368     case Op_AndReductionV:
 3369     case Op_OrReductionV:
 3370     case Op_XorReductionV:
 3371       if (is_subword_type(bt) && (UseSSE < 4)) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_MinReductionV:
 3376     case Op_MaxReductionV:
 3377       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3378         return false;
 3379       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3380         return false;
 3381       }
 3382       // Float/Double intrinsics enabled for AVX family.
 3383       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3384         return false;
 3385       }
 3386       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3387         return false;
 3388       }
 3389       break;
 3390     case Op_VectorBlend:
 3391       if (UseAVX == 0 && size_in_bits < 128) {
 3392         return false;
 3393       }
 3394       break;
 3395     case Op_VectorTest:
 3396       if (UseSSE < 4) {
 3397         return false; // Implementation limitation
 3398       } else if (size_in_bits < 32) {
 3399         return false; // Implementation limitation
 3400       }
 3401       break;
 3402     case Op_VectorLoadShuffle:
 3403     case Op_VectorRearrange:
 3404       if(vlen == 2) {
 3405         return false; // Implementation limitation due to how shuffle is loaded
 3406       } else if (size_in_bits == 256 && UseAVX < 2) {
 3407         return false; // Implementation limitation
 3408       }
 3409       break;
 3410     case Op_VectorLoadMask:
 3411     case Op_VectorMaskCast:
 3412       if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       // fallthrough
 3416     case Op_VectorStoreMask:
 3417       if (vlen == 2) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_PopulateIndex:
 3422       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3423         return false;
 3424       }
 3425       break;
 3426     case Op_VectorCastB2X:
 3427     case Op_VectorCastS2X:
 3428     case Op_VectorCastI2X:
 3429       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3430         return false;
 3431       }
 3432       break;
 3433     case Op_VectorCastL2X:
 3434       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3435         return false;
 3436       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3437         return false;
 3438       }
 3439       break;
 3440     case Op_VectorCastF2X: {
 3441         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3442         // happen after intermediate conversion to integer and special handling
 3443         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3444         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3445         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3446           return false;
 3447         }
 3448       }
 3449       // fallthrough
 3450     case Op_VectorCastD2X:
 3451       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3452         return false;
 3453       }
 3454       break;
 3455     case Op_VectorCastF2HF:
 3456     case Op_VectorCastHF2F:
 3457       if (!VM_Version::supports_f16c() &&
 3458          ((!VM_Version::supports_evex() ||
 3459          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3460         return false;
 3461       }
 3462       break;
 3463     case Op_RoundVD:
 3464       if (!VM_Version::supports_avx512dq()) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_MulReductionVI:
 3469       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_LoadVectorGatherMasked:
 3474       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3475         return false;
 3476       }
 3477       if (is_subword_type(bt) &&
 3478          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3479           (size_in_bits < 64)                                      ||
 3480           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_StoreVectorScatterMasked:
 3485     case Op_StoreVectorScatter:
 3486       if (is_subword_type(bt)) {
 3487         return false;
 3488       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3489         return false;
 3490       }
 3491       // fallthrough
 3492     case Op_LoadVectorGather:
 3493       if (!is_subword_type(bt) && size_in_bits == 64) {
 3494         return false;
 3495       }
 3496       if (is_subword_type(bt) && size_in_bits < 64) {
 3497         return false;
 3498       }
 3499       break;
 3500     case Op_SaturatingAddV:
 3501     case Op_SaturatingSubV:
 3502       if (UseAVX < 1) {
 3503         return false; // Implementation limitation
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SelectFromTwoVector:
 3510        if (size_in_bits < 128) {
 3511          return false;
 3512        }
 3513        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3514          return false;
 3515        }
 3516        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3517          return false;
 3518        }
 3519        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3520          return false;
 3521        }
 3522        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3523          return false;
 3524        }
 3525        break;
 3526     case Op_MaskAll:
 3527       if (!VM_Version::supports_evex()) {
 3528         return false;
 3529       }
 3530       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3531         return false;
 3532       }
 3533       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3534         return false;
 3535       }
 3536       break;
 3537     case Op_VectorMaskCmp:
 3538       if (vlen < 2 || size_in_bits < 32) {
 3539         return false;
 3540       }
 3541       break;
 3542     case Op_CompressM:
 3543       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3544         return false;
 3545       }
 3546       break;
 3547     case Op_CompressV:
 3548     case Op_ExpandV:
 3549       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3550         return false;
 3551       }
 3552       if (size_in_bits < 128 ) {
 3553         return false;
 3554       }
 3555     case Op_VectorLongToMask:
 3556       if (UseAVX < 1) {
 3557         return false;
 3558       }
 3559       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3560         return false;
 3561       }
 3562       break;
 3563     case Op_SignumVD:
 3564     case Op_SignumVF:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_PopCountVI:
 3570     case Op_PopCountVL: {
 3571         if (!is_pop_count_instr_target(bt) &&
 3572             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3573           return false;
 3574         }
 3575       }
 3576       break;
 3577     case Op_ReverseV:
 3578     case Op_ReverseBytesV:
 3579       if (UseAVX < 2) {
 3580         return false;
 3581       }
 3582       break;
 3583     case Op_CountTrailingZerosV:
 3584     case Op_CountLeadingZerosV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589   }
 3590   return true;  // Per default match rules are supported.
 3591 }
 3592 
 3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3594   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3595   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3596   // of their non-masked counterpart with mask edge being the differentiator.
 3597   // This routine does a strict check on the existence of masked operation patterns
 3598   // by returning a default false value for all the other opcodes apart from the
 3599   // ones whose masked instruction patterns are defined in this file.
 3600   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3601     return false;
 3602   }
 3603 
 3604   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3605   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3606     return false;
 3607   }
 3608   switch(opcode) {
 3609     // Unary masked operations
 3610     case Op_AbsVB:
 3611     case Op_AbsVS:
 3612       if(!VM_Version::supports_avx512bw()) {
 3613         return false;  // Implementation limitation
 3614       }
 3615     case Op_AbsVI:
 3616     case Op_AbsVL:
 3617       return true;
 3618 
 3619     // Ternary masked operations
 3620     case Op_FmaVF:
 3621     case Op_FmaVD:
 3622       return true;
 3623 
 3624     case Op_MacroLogicV:
 3625       if(bt != T_INT && bt != T_LONG) {
 3626         return false;
 3627       }
 3628       return true;
 3629 
 3630     // Binary masked operations
 3631     case Op_AddVB:
 3632     case Op_AddVS:
 3633     case Op_SubVB:
 3634     case Op_SubVS:
 3635     case Op_MulVS:
 3636     case Op_LShiftVS:
 3637     case Op_RShiftVS:
 3638     case Op_URShiftVS:
 3639       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3640       if (!VM_Version::supports_avx512bw()) {
 3641         return false;  // Implementation limitation
 3642       }
 3643       return true;
 3644 
 3645     case Op_MulVL:
 3646       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3647       if (!VM_Version::supports_avx512dq()) {
 3648         return false;  // Implementation limitation
 3649       }
 3650       return true;
 3651 
 3652     case Op_AndV:
 3653     case Op_OrV:
 3654     case Op_XorV:
 3655     case Op_RotateRightV:
 3656     case Op_RotateLeftV:
 3657       if (bt != T_INT && bt != T_LONG) {
 3658         return false; // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_VectorLoadMask:
 3663       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3664       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3665         return false;
 3666       }
 3667       return true;
 3668 
 3669     case Op_AddVI:
 3670     case Op_AddVL:
 3671     case Op_AddVF:
 3672     case Op_AddVD:
 3673     case Op_SubVI:
 3674     case Op_SubVL:
 3675     case Op_SubVF:
 3676     case Op_SubVD:
 3677     case Op_MulVI:
 3678     case Op_MulVF:
 3679     case Op_MulVD:
 3680     case Op_DivVF:
 3681     case Op_DivVD:
 3682     case Op_SqrtVF:
 3683     case Op_SqrtVD:
 3684     case Op_LShiftVI:
 3685     case Op_LShiftVL:
 3686     case Op_RShiftVI:
 3687     case Op_RShiftVL:
 3688     case Op_URShiftVI:
 3689     case Op_URShiftVL:
 3690     case Op_LoadVectorMasked:
 3691     case Op_StoreVectorMasked:
 3692     case Op_LoadVectorGatherMasked:
 3693     case Op_StoreVectorScatterMasked:
 3694       return true;
 3695 
 3696     case Op_UMinV:
 3697     case Op_UMaxV:
 3698       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3699         return false;
 3700       } // fallthrough
 3701     case Op_MaxV:
 3702     case Op_MinV:
 3703       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3704         return false; // Implementation limitation
 3705       }
 3706       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3707         return false; // Implementation limitation
 3708       }
 3709       return true;
 3710     case Op_SaturatingAddV:
 3711     case Op_SaturatingSubV:
 3712       if (!is_subword_type(bt)) {
 3713         return false;
 3714       }
 3715       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719 
 3720     case Op_VectorMaskCmp:
 3721       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorRearrange:
 3727       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3731         return false; // Implementation limitation
 3732       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     // Binary Logical operations
 3738     case Op_AndVMask:
 3739     case Op_OrVMask:
 3740     case Op_XorVMask:
 3741       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     case Op_PopCountVI:
 3747     case Op_PopCountVL:
 3748       if (!is_pop_count_instr_target(bt)) {
 3749         return false;
 3750       }
 3751       return true;
 3752 
 3753     case Op_MaskAll:
 3754       return true;
 3755 
 3756     case Op_CountLeadingZerosV:
 3757       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3758         return true;
 3759       }
 3760     default:
 3761       return false;
 3762   }
 3763 }
 3764 
 3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3766   return false;
 3767 }
 3768 
 3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3771   switch (elem_bt) {
 3772     case T_BYTE:  return false;
 3773     case T_SHORT: return !VM_Version::supports_avx512bw();
 3774     case T_INT:   return !VM_Version::supports_avx();
 3775     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3776     default:
 3777       ShouldNotReachHere();
 3778       return false;
 3779   }
 3780 }
 3781 
 3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3783   // Prefer predicate if the mask type is "TypePVectMask".
 3784   return vt->isa_pvectmask() != nullptr;
 3785 }
 3786 
 3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3788   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3789   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3790   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3791       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3792     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3793     return new legVecZOper();
 3794   }
 3795   if (legacy) {
 3796     switch (ideal_reg) {
 3797       case Op_VecS: return new legVecSOper();
 3798       case Op_VecD: return new legVecDOper();
 3799       case Op_VecX: return new legVecXOper();
 3800       case Op_VecY: return new legVecYOper();
 3801       case Op_VecZ: return new legVecZOper();
 3802     }
 3803   } else {
 3804     switch (ideal_reg) {
 3805       case Op_VecS: return new vecSOper();
 3806       case Op_VecD: return new vecDOper();
 3807       case Op_VecX: return new vecXOper();
 3808       case Op_VecY: return new vecYOper();
 3809       case Op_VecZ: return new vecZOper();
 3810     }
 3811   }
 3812   ShouldNotReachHere();
 3813   return nullptr;
 3814 }
 3815 
 3816 bool Matcher::is_reg2reg_move(MachNode* m) {
 3817   switch (m->rule()) {
 3818     case MoveVec2Leg_rule:
 3819     case MoveLeg2Vec_rule:
 3820     case MoveF2VL_rule:
 3821     case MoveF2LEG_rule:
 3822     case MoveVL2F_rule:
 3823     case MoveLEG2F_rule:
 3824     case MoveD2VL_rule:
 3825     case MoveD2LEG_rule:
 3826     case MoveVL2D_rule:
 3827     case MoveLEG2D_rule:
 3828       return true;
 3829     default:
 3830       return false;
 3831   }
 3832 }
 3833 
 3834 bool Matcher::is_generic_vector(MachOper* opnd) {
 3835   switch (opnd->opcode()) {
 3836     case VEC:
 3837     case LEGVEC:
 3838       return true;
 3839     default:
 3840       return false;
 3841   }
 3842 }
 3843 
 3844 //------------------------------------------------------------------------
 3845 
 3846 const RegMask* Matcher::predicate_reg_mask(void) {
 3847   return &_VECTMASK_REG_mask;
 3848 }
 3849 
 3850 // Max vector size in bytes. 0 if not supported.
 3851 int Matcher::vector_width_in_bytes(BasicType bt) {
 3852   assert(is_java_primitive(bt), "only primitive type vectors");
 3853   // SSE2 supports 128bit vectors for all types.
 3854   // AVX2 supports 256bit vectors for all types.
 3855   // AVX2/EVEX supports 512bit vectors for all types.
 3856   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3857   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3858   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3859     size = (UseAVX > 2) ? 64 : 32;
 3860   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3861     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3862   // Use flag to limit vector size.
 3863   size = MIN2(size,(int)MaxVectorSize);
 3864   // Minimum 2 values in vector (or 4 for bytes).
 3865   switch (bt) {
 3866   case T_DOUBLE:
 3867   case T_LONG:
 3868     if (size < 16) return 0;
 3869     break;
 3870   case T_FLOAT:
 3871   case T_INT:
 3872     if (size < 8) return 0;
 3873     break;
 3874   case T_BOOLEAN:
 3875     if (size < 4) return 0;
 3876     break;
 3877   case T_CHAR:
 3878     if (size < 4) return 0;
 3879     break;
 3880   case T_BYTE:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_SHORT:
 3884     if (size < 4) return 0;
 3885     break;
 3886   default:
 3887     ShouldNotReachHere();
 3888   }
 3889   return size;
 3890 }
 3891 
 3892 // Limits on vector size (number of elements) loaded into vector.
 3893 int Matcher::max_vector_size(const BasicType bt) {
 3894   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3895 }
 3896 int Matcher::min_vector_size(const BasicType bt) {
 3897   int max_size = max_vector_size(bt);
 3898   // Min size which can be loaded into vector is 4 bytes.
 3899   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3900   // Support for calling svml double64 vectors
 3901   if (bt == T_DOUBLE) {
 3902     size = 1;
 3903   }
 3904   return MIN2(size,max_size);
 3905 }
 3906 
 3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3908   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3909   // by default on Cascade Lake
 3910   if (VM_Version::is_default_intel_cascade_lake()) {
 3911     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3912   }
 3913   return Matcher::max_vector_size(bt);
 3914 }
 3915 
 3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3917   return -1;
 3918 }
 3919 
 3920 // Vector ideal reg corresponding to specified size in bytes
 3921 uint Matcher::vector_ideal_reg(int size) {
 3922   assert(MaxVectorSize >= size, "");
 3923   switch(size) {
 3924     case  4: return Op_VecS;
 3925     case  8: return Op_VecD;
 3926     case 16: return Op_VecX;
 3927     case 32: return Op_VecY;
 3928     case 64: return Op_VecZ;
 3929   }
 3930   ShouldNotReachHere();
 3931   return 0;
 3932 }
 3933 
 3934 // Check for shift by small constant as well
 3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3936   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3937       shift->in(2)->get_int() <= 3 &&
 3938       // Are there other uses besides address expressions?
 3939       !matcher->is_visited(shift)) {
 3940     address_visited.set(shift->_idx); // Flag as address_visited
 3941     mstack.push(shift->in(2), Matcher::Visit);
 3942     Node *conv = shift->in(1);
 3943     // Allow Matcher to match the rule which bypass
 3944     // ConvI2L operation for an array index on LP64
 3945     // if the index value is positive.
 3946     if (conv->Opcode() == Op_ConvI2L &&
 3947         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3948         // Are there other uses besides address expressions?
 3949         !matcher->is_visited(conv)) {
 3950       address_visited.set(conv->_idx); // Flag as address_visited
 3951       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3952     } else {
 3953       mstack.push(conv, Matcher::Pre_Visit);
 3954     }
 3955     return true;
 3956   }
 3957   return false;
 3958 }
 3959 
 3960 // This function identifies sub-graphs in which a 'load' node is
 3961 // input to two different nodes, and such that it can be matched
 3962 // with BMI instructions like blsi, blsr, etc.
 3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3965 // refers to the same node.
 3966 //
 3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3968 // This is a temporary solution until we make DAGs expressible in ADL.
 3969 template<typename ConType>
 3970 class FusedPatternMatcher {
 3971   Node* _op1_node;
 3972   Node* _mop_node;
 3973   int _con_op;
 3974 
 3975   static int match_next(Node* n, int next_op, int next_op_idx) {
 3976     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3977       return -1;
 3978     }
 3979 
 3980     if (next_op_idx == -1) { // n is commutative, try rotations
 3981       if (n->in(1)->Opcode() == next_op) {
 3982         return 1;
 3983       } else if (n->in(2)->Opcode() == next_op) {
 3984         return 2;
 3985       }
 3986     } else {
 3987       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3988       if (n->in(next_op_idx)->Opcode() == next_op) {
 3989         return next_op_idx;
 3990       }
 3991     }
 3992     return -1;
 3993   }
 3994 
 3995  public:
 3996   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3997     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3998 
 3999   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4000              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4001              typename ConType::NativeType con_value) {
 4002     if (_op1_node->Opcode() != op1) {
 4003       return false;
 4004     }
 4005     if (_mop_node->outcnt() > 2) {
 4006       return false;
 4007     }
 4008     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4009     if (op1_op2_idx == -1) {
 4010       return false;
 4011     }
 4012     // Memory operation must be the other edge
 4013     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4014 
 4015     // Check that the mop node is really what we want
 4016     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4017       Node* op2_node = _op1_node->in(op1_op2_idx);
 4018       if (op2_node->outcnt() > 1) {
 4019         return false;
 4020       }
 4021       assert(op2_node->Opcode() == op2, "Should be");
 4022       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4023       if (op2_con_idx == -1) {
 4024         return false;
 4025       }
 4026       // Memory operation must be the other edge
 4027       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4028       // Check that the memory operation is the same node
 4029       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4030         // Now check the constant
 4031         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4032         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4033           return true;
 4034         }
 4035       }
 4036     }
 4037     return false;
 4038   }
 4039 };
 4040 
 4041 static bool is_bmi_pattern(Node* n, Node* m) {
 4042   assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
 4043   if (n != nullptr && m != nullptr) {
 4044     if (m->Opcode() == Op_LoadI) {
 4045       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4046       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4047              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4048              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4049     } else if (m->Opcode() == Op_LoadL) {
 4050       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4051       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4052              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4053              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4054     }
 4055   }
 4056   return false;
 4057 }
 4058 
 4059 // Should the matcher clone input 'm' of node 'n'?
 4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4061   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4062   if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
 4063     mstack.push(m, Visit);
 4064     return true;
 4065   }
 4066   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4067     mstack.push(m, Visit);           // m = ShiftCntV
 4068     return true;
 4069   }
 4070   if (is_encode_and_store_pattern(n, m)) {
 4071     mstack.push(m, Visit);
 4072     return true;
 4073   }
 4074   return false;
 4075 }
 4076 
 4077 // Should the Matcher clone shifts on addressing modes, expecting them
 4078 // to be subsumed into complex addressing expressions or compute them
 4079 // into registers?
 4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4081   Node *off = m->in(AddPNode::Offset);
 4082   if (off->is_Con()) {
 4083     address_visited.test_set(m->_idx); // Flag as address_visited
 4084     Node *adr = m->in(AddPNode::Address);
 4085 
 4086     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4087     // AtomicAdd is not an addressing expression.
 4088     // Cheap to find it by looking for screwy base.
 4089     if (adr->is_AddP() &&
 4090         !adr->in(AddPNode::Base)->is_top() &&
 4091         !adr->in(AddPNode::Offset)->is_Con() &&
 4092         off->get_long() == (int) (off->get_long()) && // immL32
 4093         // Are there other uses besides address expressions?
 4094         !is_visited(adr)) {
 4095       address_visited.set(adr->_idx); // Flag as address_visited
 4096       Node *shift = adr->in(AddPNode::Offset);
 4097       if (!clone_shift(shift, this, mstack, address_visited)) {
 4098         mstack.push(shift, Pre_Visit);
 4099       }
 4100       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4101       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4102     } else {
 4103       mstack.push(adr, Pre_Visit);
 4104     }
 4105 
 4106     // Clone X+offset as it also folds into most addressing expressions
 4107     mstack.push(off, Visit);
 4108     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4109     return true;
 4110   } else if (clone_shift(off, this, mstack, address_visited)) {
 4111     address_visited.test_set(m->_idx); // Flag as address_visited
 4112     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4113     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4114     return true;
 4115   }
 4116   return false;
 4117 }
 4118 
 4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4120   switch (bt) {
 4121     case BoolTest::eq:
 4122       return Assembler::eq;
 4123     case BoolTest::ne:
 4124       return Assembler::neq;
 4125     case BoolTest::le:
 4126     case BoolTest::ule:
 4127       return Assembler::le;
 4128     case BoolTest::ge:
 4129     case BoolTest::uge:
 4130       return Assembler::nlt;
 4131     case BoolTest::lt:
 4132     case BoolTest::ult:
 4133       return Assembler::lt;
 4134     case BoolTest::gt:
 4135     case BoolTest::ugt:
 4136       return Assembler::nle;
 4137     default : ShouldNotReachHere(); return Assembler::_false;
 4138   }
 4139 }
 4140 
 4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4142   switch (bt) {
 4143   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4144   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4145   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4146   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4147   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4148   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4149   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4150   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4151   }
 4152 }
 4153 
 4154 // Helper methods for MachSpillCopyNode::implementation().
 4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4156                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4157   assert(ireg == Op_VecS || // 32bit vector
 4158          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4159           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4160          "no non-adjacent vector moves" );
 4161   if (masm) {
 4162     switch (ireg) {
 4163     case Op_VecS: // copy whole register
 4164     case Op_VecD:
 4165     case Op_VecX:
 4166       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4167         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4168       } else {
 4169         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4170      }
 4171       break;
 4172     case Op_VecY:
 4173       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4174         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4175       } else {
 4176         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4177      }
 4178       break;
 4179     case Op_VecZ:
 4180       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4181       break;
 4182     default:
 4183       ShouldNotReachHere();
 4184     }
 4185 #ifndef PRODUCT
 4186   } else {
 4187     switch (ireg) {
 4188     case Op_VecS:
 4189     case Op_VecD:
 4190     case Op_VecX:
 4191       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4192       break;
 4193     case Op_VecY:
 4194     case Op_VecZ:
 4195       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4196       break;
 4197     default:
 4198       ShouldNotReachHere();
 4199     }
 4200 #endif
 4201   }
 4202 }
 4203 
 4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4205                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4206   if (masm) {
 4207     if (is_load) {
 4208       switch (ireg) {
 4209       case Op_VecS:
 4210         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4211         break;
 4212       case Op_VecD:
 4213         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4214         break;
 4215       case Op_VecX:
 4216         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4217           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4218         } else {
 4219           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4220           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4221         }
 4222         break;
 4223       case Op_VecY:
 4224         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4225           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         } else {
 4227           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4228           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4229         }
 4230         break;
 4231       case Op_VecZ:
 4232         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4233         break;
 4234       default:
 4235         ShouldNotReachHere();
 4236       }
 4237     } else { // store
 4238       switch (ireg) {
 4239       case Op_VecS:
 4240         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4241         break;
 4242       case Op_VecD:
 4243         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4244         break;
 4245       case Op_VecX:
 4246         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4247           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4248         }
 4249         else {
 4250           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4251         }
 4252         break;
 4253       case Op_VecY:
 4254         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4255           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         }
 4257         else {
 4258           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4259         }
 4260         break;
 4261       case Op_VecZ:
 4262         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4263         break;
 4264       default:
 4265         ShouldNotReachHere();
 4266       }
 4267     }
 4268 #ifndef PRODUCT
 4269   } else {
 4270     if (is_load) {
 4271       switch (ireg) {
 4272       case Op_VecS:
 4273         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4274         break;
 4275       case Op_VecD:
 4276         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4277         break;
 4278        case Op_VecX:
 4279         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecY:
 4282       case Op_VecZ:
 4283         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4284         break;
 4285       default:
 4286         ShouldNotReachHere();
 4287       }
 4288     } else { // store
 4289       switch (ireg) {
 4290       case Op_VecS:
 4291         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4292         break;
 4293       case Op_VecD:
 4294         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4295         break;
 4296        case Op_VecX:
 4297         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecY:
 4300       case Op_VecZ:
 4301         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4302         break;
 4303       default:
 4304         ShouldNotReachHere();
 4305       }
 4306     }
 4307 #endif
 4308   }
 4309 }
 4310 
 4311 template <class T>
 4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4313   int size = type2aelembytes(bt) * len;
 4314   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4315   for (int i = 0; i < len; i++) {
 4316     int offset = i * type2aelembytes(bt);
 4317     switch (bt) {
 4318       case T_BYTE: val->at(i) = con; break;
 4319       case T_SHORT: {
 4320         jshort c = con;
 4321         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4322         break;
 4323       }
 4324       case T_INT: {
 4325         jint c = con;
 4326         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4327         break;
 4328       }
 4329       case T_LONG: {
 4330         jlong c = con;
 4331         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4332         break;
 4333       }
 4334       case T_FLOAT: {
 4335         jfloat c = con;
 4336         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4337         break;
 4338       }
 4339       case T_DOUBLE: {
 4340         jdouble c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4342         break;
 4343       }
 4344       default: assert(false, "%s", type2name(bt));
 4345     }
 4346   }
 4347   return val;
 4348 }
 4349 
 4350 static inline jlong high_bit_set(BasicType bt) {
 4351   switch (bt) {
 4352     case T_BYTE:  return 0x8080808080808080;
 4353     case T_SHORT: return 0x8000800080008000;
 4354     case T_INT:   return 0x8000000080000000;
 4355     case T_LONG:  return 0x8000000000000000;
 4356     default:
 4357       ShouldNotReachHere();
 4358       return 0;
 4359   }
 4360 }
 4361 
 4362 #ifndef PRODUCT
 4363   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4364     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4365   }
 4366 #endif
 4367 
 4368   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4369     __ nop(_count);
 4370   }
 4371 
 4372   uint MachNopNode::size(PhaseRegAlloc*) const {
 4373     return _count;
 4374   }
 4375 
 4376 #ifndef PRODUCT
 4377   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4378     st->print("# breakpoint");
 4379   }
 4380 #endif
 4381 
 4382   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4383     __ int3();
 4384   }
 4385 
 4386   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4387     return MachNode::size(ra_);
 4388   }
 4389 
 4390 %}
 4391 
 4392 //----------ENCODING BLOCK-----------------------------------------------------
 4393 // This block specifies the encoding classes used by the compiler to
 4394 // output byte streams.  Encoding classes are parameterized macros
 4395 // used by Machine Instruction Nodes in order to generate the bit
 4396 // encoding of the instruction.  Operands specify their base encoding
 4397 // interface with the interface keyword.  There are currently
 4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4399 // COND_INTER.  REG_INTER causes an operand to generate a function
 4400 // which returns its register number when queried.  CONST_INTER causes
 4401 // an operand to generate a function which returns the value of the
 4402 // constant when queried.  MEMORY_INTER causes an operand to generate
 4403 // four functions which return the Base Register, the Index Register,
 4404 // the Scale Value, and the Offset Value of the operand when queried.
 4405 // COND_INTER causes an operand to generate six functions which return
 4406 // the encoding code (ie - encoding bits for the instruction)
 4407 // associated with each basic boolean condition for a conditional
 4408 // instruction.
 4409 //
 4410 // Instructions specify two basic values for encoding.  Again, a
 4411 // function is available to check if the constant displacement is an
 4412 // oop. They use the ins_encode keyword to specify their encoding
 4413 // classes (which must be a sequence of enc_class names, and their
 4414 // parameters, specified in the encoding block), and they use the
 4415 // opcode keyword to specify, in order, their primary, secondary, and
 4416 // tertiary opcode.  Only the opcode sections which a particular
 4417 // instruction needs for encoding need to be specified.
 4418 encode %{
 4419   enc_class cdql_enc(no_rax_rdx_RegI div)
 4420   %{
 4421     // Full implementation of Java idiv and irem; checks for
 4422     // special case as described in JVM spec., p.243 & p.271.
 4423     //
 4424     //         normal case                           special case
 4425     //
 4426     // input : rax: dividend                         min_int
 4427     //         reg: divisor                          -1
 4428     //
 4429     // output: rax: quotient  (= rax idiv reg)       min_int
 4430     //         rdx: remainder (= rax irem reg)       0
 4431     //
 4432     //  Code sequnce:
 4433     //
 4434     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4435     //    5:   75 07/08                jne    e <normal>
 4436     //    7:   33 d2                   xor    %edx,%edx
 4437     //  [div >= 8 -> offset + 1]
 4438     //  [REX_B]
 4439     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4440     //    c:   74 03/04                je     11 <done>
 4441     // 000000000000000e <normal>:
 4442     //    e:   99                      cltd
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    f:   f7 f9                   idiv   $div
 4446     // 0000000000000011 <done>:
 4447     Label normal;
 4448     Label done;
 4449 
 4450     // cmp    $0x80000000,%eax
 4451     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4452 
 4453     // jne    e <normal>
 4454     __ jccb(Assembler::notEqual, normal);
 4455 
 4456     // xor    %edx,%edx
 4457     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4458 
 4459     // cmp    $0xffffffffffffffff,%ecx
 4460     __ cmpl($div$$Register, -1);
 4461 
 4462     // je     11 <done>
 4463     __ jccb(Assembler::equal, done);
 4464 
 4465     // <normal>
 4466     // cltd
 4467     __ bind(normal);
 4468     __ cdql();
 4469 
 4470     // idivl
 4471     // <done>
 4472     __ idivl($div$$Register);
 4473     __ bind(done);
 4474   %}
 4475 
 4476   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4477   %{
 4478     // Full implementation of Java ldiv and lrem; checks for
 4479     // special case as described in JVM spec., p.243 & p.271.
 4480     //
 4481     //         normal case                           special case
 4482     //
 4483     // input : rax: dividend                         min_long
 4484     //         reg: divisor                          -1
 4485     //
 4486     // output: rax: quotient  (= rax idiv reg)       min_long
 4487     //         rdx: remainder (= rax irem reg)       0
 4488     //
 4489     //  Code sequnce:
 4490     //
 4491     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4492     //    7:   00 00 80
 4493     //    a:   48 39 d0                cmp    %rdx,%rax
 4494     //    d:   75 08                   jne    17 <normal>
 4495     //    f:   33 d2                   xor    %edx,%edx
 4496     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4497     //   15:   74 05                   je     1c <done>
 4498     // 0000000000000017 <normal>:
 4499     //   17:   48 99                   cqto
 4500     //   19:   48 f7 f9                idiv   $div
 4501     // 000000000000001c <done>:
 4502     Label normal;
 4503     Label done;
 4504 
 4505     // mov    $0x8000000000000000,%rdx
 4506     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4507 
 4508     // cmp    %rdx,%rax
 4509     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4510 
 4511     // jne    17 <normal>
 4512     __ jccb(Assembler::notEqual, normal);
 4513 
 4514     // xor    %edx,%edx
 4515     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4516 
 4517     // cmp    $0xffffffffffffffff,$div
 4518     __ cmpq($div$$Register, -1);
 4519 
 4520     // je     1e <done>
 4521     __ jccb(Assembler::equal, done);
 4522 
 4523     // <normal>
 4524     // cqto
 4525     __ bind(normal);
 4526     __ cdqq();
 4527 
 4528     // idivq (note: must be emitted by the user of this rule)
 4529     // <done>
 4530     __ idivq($div$$Register);
 4531     __ bind(done);
 4532   %}
 4533 
 4534   enc_class clear_avx %{
 4535     DEBUG_ONLY(int off0 = __ offset());
 4536     if (generate_vzeroupper(Compile::current())) {
 4537       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4538       // Clear upper bits of YMM registers when current compiled code uses
 4539       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4540       __ vzeroupper();
 4541     }
 4542     DEBUG_ONLY(int off1 = __ offset());
 4543     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4544   %}
 4545 
 4546   enc_class Java_To_Runtime(method meth) %{
 4547     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4548     __ call(r10);
 4549     __ post_call_nop();
 4550   %}
 4551 
 4552   enc_class Java_Static_Call(method meth)
 4553   %{
 4554     // JAVA STATIC CALL
 4555     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4556     // determine who we intended to call.
 4557     if (!_method) {
 4558       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4559     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4560       // The NOP here is purely to ensure that eliding a call to
 4561       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4562       __ nop(5);
 4563       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4564     } else {
 4565       int method_index = resolved_method_index(masm);
 4566       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4567                                                   : static_call_Relocation::spec(method_index);
 4568       address mark = __ pc();
 4569       int call_offset = __ offset();
 4570       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4571       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4572         // Calls of the same statically bound method can share
 4573         // a stub to the interpreter.
 4574         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4575       } else {
 4576         // Emit stubs for static call.
 4577         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4578         __ clear_inst_mark();
 4579         if (stub == nullptr) {
 4580           ciEnv::current()->record_failure("CodeCache is full");
 4581           return;
 4582         }
 4583       }
 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }
 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.
 4624 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4625 //        |        |   in   |  3   area for Intel ret address
 4626 //     Owned by    |preserve|      Empty on Sparc.
 4627 //       SELF      +--------+
 4628 //        |        |  pad2  |  2   pad to align old SP
 4629 //        |        +--------+  1
 4630 //        |        | locks  |  0
 4631 //        |        +--------+----> OptoReg::stack0(), even aligned
 4632 //        |        |  pad1  | 11   pad to align new SP
 4633 //        |        +--------+
 4634 //        |        |        | 10
 4635 //        |        | spills |  9   spills
 4636 //        V        |        |  8   (pad0 slot for callee)
 4637 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4638 //        ^        |  out   |  7
 4639 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4640 //     Owned by    +--------+
 4641 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4642 //        |    new |preserve|      Must be even-aligned.
 4643 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4644 //        |        |        |
 4645 //
 4646 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4647 //         known from SELF's arguments and the Java calling convention.
 4648 //         Region 6-7 is determined per call site.
 4649 // Note 2: If the calling convention leaves holes in the incoming argument
 4650 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4651 //         are owned by the CALLEE.  Holes should not be necessary in the
 4652 //         incoming area, as the Java calling convention is completely under
 4653 //         the control of the AD file.  Doubles can be sorted and packed to
 4654 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4655 //         varargs C calling conventions.
 4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4657 //         even aligned with pad0 as needed.
 4658 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4659 //         region 6-11 is even aligned; it may be padded out more so that
 4660 //         the region from SP to FP meets the minimum stack alignment.
 4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4662 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4663 //         SP meets the minimum alignment.
 4664 
 4665 frame
 4666 %{
 4667   // These three registers define part of the calling convention
 4668   // between compiled code and the interpreter.
 4669   inline_cache_reg(RAX);                // Inline Cache Register
 4670 
 4671   // Optional: name the operand used by cisc-spilling to access
 4672   // [stack_pointer + offset]
 4673   cisc_spilling_operand_name(indOffset32);
 4674 
 4675   // Number of stack slots consumed by locking an object
 4676   sync_stack_slots(2);
 4677 
 4678   // Compiled code's Frame Pointer
 4679   frame_pointer(RSP);
 4680 
 4681   // Stack alignment requirement
 4682   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4683 
 4684   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4685   // for calls to C.  Supports the var-args backing area for register parms.
 4686   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4687 
 4688   // The after-PROLOG location of the return address.  Location of
 4689   // return address specifies a type (REG or STACK) and a number
 4690   // representing the register number (i.e. - use a register name) or
 4691   // stack slot.
 4692   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4693   // Otherwise, it is above the locks and verification slot and alignment word
 4694   return_addr(STACK - 2 +
 4695               align_up((Compile::current()->in_preserve_stack_slots() +
 4696                         Compile::current()->fixed_slots()),
 4697                        stack_alignment_in_slots()));
 4698 
 4699   // Location of compiled Java return values.  Same as C for now.
 4700   return_value
 4701   %{
 4702     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4703            "only return normal values");
 4704 
 4705     static const int lo[Op_RegL + 1] = {
 4706       0,
 4707       0,
 4708       RAX_num,  // Op_RegN
 4709       RAX_num,  // Op_RegI
 4710       RAX_num,  // Op_RegP
 4711       XMM0_num, // Op_RegF
 4712       XMM0_num, // Op_RegD
 4713       RAX_num   // Op_RegL
 4714     };
 4715     static const int hi[Op_RegL + 1] = {
 4716       0,
 4717       0,
 4718       OptoReg::Bad, // Op_RegN
 4719       OptoReg::Bad, // Op_RegI
 4720       RAX_H_num,    // Op_RegP
 4721       OptoReg::Bad, // Op_RegF
 4722       XMM0b_num,    // Op_RegD
 4723       RAX_H_num     // Op_RegL
 4724     };
 4725     // Excluded flags and vector registers.
 4726     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4727     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4728   %}
 4729 %}
 4730 
 4731 //----------ATTRIBUTES---------------------------------------------------------
 4732 //----------Operand Attributes-------------------------------------------------
 4733 op_attrib op_cost(0);        // Required cost attribute
 4734 
 4735 //----------Instruction Attributes---------------------------------------------
 4736 ins_attrib ins_cost(100);       // Required cost attribute
 4737 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4738 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4739                                 // a non-matching short branch variant
 4740                                 // of some long branch?
 4741 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4742                                 // be a power of 2) specifies the
 4743                                 // alignment that some part of the
 4744                                 // instruction (not necessarily the
 4745                                 // start) requires.  If > 1, a
 4746                                 // compute_padding() function must be
 4747                                 // provided for the instruction
 4748 
 4749 // Whether this node is expanded during code emission into a sequence of
 4750 // instructions and the first instruction can perform an implicit null check.
 4751 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4752 
 4753 //----------OPERANDS-----------------------------------------------------------
 4754 // Operand definitions must precede instruction definitions for correct parsing
 4755 // in the ADLC because operands constitute user defined types which are used in
 4756 // instruction definitions.
 4757 
 4758 //----------Simple Operands----------------------------------------------------
 4759 // Immediate Operands
 4760 // Integer Immediate
 4761 operand immI()
 4762 %{
 4763   match(ConI);
 4764 
 4765   op_cost(10);
 4766   format %{ %}
 4767   interface(CONST_INTER);
 4768 %}
 4769 
 4770 // Constant for test vs zero
 4771 operand immI_0()
 4772 %{
 4773   predicate(n->get_int() == 0);
 4774   match(ConI);
 4775 
 4776   op_cost(0);
 4777   format %{ %}
 4778   interface(CONST_INTER);
 4779 %}
 4780 
 4781 // Constant for increment
 4782 operand immI_1()
 4783 %{
 4784   predicate(n->get_int() == 1);
 4785   match(ConI);
 4786 
 4787   op_cost(0);
 4788   format %{ %}
 4789   interface(CONST_INTER);
 4790 %}
 4791 
 4792 // Constant for decrement
 4793 operand immI_M1()
 4794 %{
 4795   predicate(n->get_int() == -1);
 4796   match(ConI);
 4797 
 4798   op_cost(0);
 4799   format %{ %}
 4800   interface(CONST_INTER);
 4801 %}
 4802 
 4803 operand immI_2()
 4804 %{
 4805   predicate(n->get_int() == 2);
 4806   match(ConI);
 4807 
 4808   op_cost(0);
 4809   format %{ %}
 4810   interface(CONST_INTER);
 4811 %}
 4812 
 4813 operand immI_4()
 4814 %{
 4815   predicate(n->get_int() == 4);
 4816   match(ConI);
 4817 
 4818   op_cost(0);
 4819   format %{ %}
 4820   interface(CONST_INTER);
 4821 %}
 4822 
 4823 operand immI_8()
 4824 %{
 4825   predicate(n->get_int() == 8);
 4826   match(ConI);
 4827 
 4828   op_cost(0);
 4829   format %{ %}
 4830   interface(CONST_INTER);
 4831 %}
 4832 
 4833 // Valid scale values for addressing modes
 4834 operand immI2()
 4835 %{
 4836   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4837   match(ConI);
 4838 
 4839   format %{ %}
 4840   interface(CONST_INTER);
 4841 %}
 4842 
 4843 operand immU7()
 4844 %{
 4845   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4846   match(ConI);
 4847 
 4848   op_cost(5);
 4849   format %{ %}
 4850   interface(CONST_INTER);
 4851 %}
 4852 
 4853 operand immI8()
 4854 %{
 4855   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4856   match(ConI);
 4857 
 4858   op_cost(5);
 4859   format %{ %}
 4860   interface(CONST_INTER);
 4861 %}
 4862 
 4863 operand immU8()
 4864 %{
 4865   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4866   match(ConI);
 4867 
 4868   op_cost(5);
 4869   format %{ %}
 4870   interface(CONST_INTER);
 4871 %}
 4872 
 4873 operand immI16()
 4874 %{
 4875   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4876   match(ConI);
 4877 
 4878   op_cost(10);
 4879   format %{ %}
 4880   interface(CONST_INTER);
 4881 %}
 4882 
 4883 // Int Immediate non-negative
 4884 operand immU31()
 4885 %{
 4886   predicate(n->get_int() >= 0);
 4887   match(ConI);
 4888 
 4889   op_cost(0);
 4890   format %{ %}
 4891   interface(CONST_INTER);
 4892 %}
 4893 
 4894 // Pointer Immediate
 4895 operand immP()
 4896 %{
 4897   match(ConP);
 4898 
 4899   op_cost(10);
 4900   format %{ %}
 4901   interface(CONST_INTER);
 4902 %}
 4903 
 4904 // Null Pointer Immediate
 4905 operand immP0()
 4906 %{
 4907   predicate(n->get_ptr() == 0);
 4908   match(ConP);
 4909 
 4910   op_cost(5);
 4911   format %{ %}
 4912   interface(CONST_INTER);
 4913 %}
 4914 
 4915 // Pointer Immediate
 4916 operand immN() %{
 4917   match(ConN);
 4918 
 4919   op_cost(10);
 4920   format %{ %}
 4921   interface(CONST_INTER);
 4922 %}
 4923 
 4924 operand immNKlass() %{
 4925   match(ConNKlass);
 4926 
 4927   op_cost(10);
 4928   format %{ %}
 4929   interface(CONST_INTER);
 4930 %}
 4931 
 4932 // Null Pointer Immediate
 4933 operand immN0() %{
 4934   predicate(n->get_narrowcon() == 0);
 4935   match(ConN);
 4936 
 4937   op_cost(5);
 4938   format %{ %}
 4939   interface(CONST_INTER);
 4940 %}
 4941 
 4942 operand immP31()
 4943 %{
 4944   predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
 4945             && (n->get_ptr() >> 31) == 0);
 4946   match(ConP);
 4947 
 4948   op_cost(5);
 4949   format %{ %}
 4950   interface(CONST_INTER);
 4951 %}
 4952 
 4953 
 4954 // Long Immediate
 4955 operand immL()
 4956 %{
 4957   match(ConL);
 4958 
 4959   op_cost(20);
 4960   format %{ %}
 4961   interface(CONST_INTER);
 4962 %}
 4963 
 4964 // Long Immediate 8-bit
 4965 operand immL8()
 4966 %{
 4967   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4968   match(ConL);
 4969 
 4970   op_cost(5);
 4971   format %{ %}
 4972   interface(CONST_INTER);
 4973 %}
 4974 
 4975 // Long Immediate 32-bit unsigned
 4976 operand immUL32()
 4977 %{
 4978   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4979   match(ConL);
 4980 
 4981   op_cost(10);
 4982   format %{ %}
 4983   interface(CONST_INTER);
 4984 %}
 4985 
 4986 // Long Immediate 32-bit signed
 4987 operand immL32()
 4988 %{
 4989   predicate(n->get_long() == (int) (n->get_long()));
 4990   match(ConL);
 4991 
 4992   op_cost(15);
 4993   format %{ %}
 4994   interface(CONST_INTER);
 4995 %}
 4996 
 4997 operand immL_Pow2()
 4998 %{
 4999   predicate(is_power_of_2((julong)n->get_long()));
 5000   match(ConL);
 5001 
 5002   op_cost(15);
 5003   format %{ %}
 5004   interface(CONST_INTER);
 5005 %}
 5006 
 5007 operand immL_NotPow2()
 5008 %{
 5009   predicate(is_power_of_2((julong)~n->get_long()));
 5010   match(ConL);
 5011 
 5012   op_cost(15);
 5013   format %{ %}
 5014   interface(CONST_INTER);
 5015 %}
 5016 
 5017 // Long Immediate zero
 5018 operand immL0()
 5019 %{
 5020   predicate(n->get_long() == 0L);
 5021   match(ConL);
 5022 
 5023   op_cost(10);
 5024   format %{ %}
 5025   interface(CONST_INTER);
 5026 %}
 5027 
 5028 // Constant for increment
 5029 operand immL1()
 5030 %{
 5031   predicate(n->get_long() == 1);
 5032   match(ConL);
 5033 
 5034   format %{ %}
 5035   interface(CONST_INTER);
 5036 %}
 5037 
 5038 // Constant for decrement
 5039 operand immL_M1()
 5040 %{
 5041   predicate(n->get_long() == -1);
 5042   match(ConL);
 5043 
 5044   format %{ %}
 5045   interface(CONST_INTER);
 5046 %}
 5047 
 5048 // Long Immediate: low 32-bit mask
 5049 operand immL_32bits()
 5050 %{
 5051   predicate(n->get_long() == 0xFFFFFFFFL);
 5052   match(ConL);
 5053   op_cost(20);
 5054 
 5055   format %{ %}
 5056   interface(CONST_INTER);
 5057 %}
 5058 
 5059 // Int Immediate: 2^n-1, positive
 5060 operand immI_Pow2M1()
 5061 %{
 5062   predicate((n->get_int() > 0)
 5063             && is_power_of_2((juint)n->get_int() + 1));
 5064   match(ConI);
 5065 
 5066   op_cost(20);
 5067   format %{ %}
 5068   interface(CONST_INTER);
 5069 %}
 5070 
 5071 // Float Immediate zero
 5072 operand immF0()
 5073 %{
 5074   predicate(jint_cast(n->getf()) == 0);
 5075   match(ConF);
 5076 
 5077   op_cost(5);
 5078   format %{ %}
 5079   interface(CONST_INTER);
 5080 %}
 5081 
 5082 // Float Immediate
 5083 operand immF()
 5084 %{
 5085   match(ConF);
 5086 
 5087   op_cost(15);
 5088   format %{ %}
 5089   interface(CONST_INTER);
 5090 %}
 5091 
 5092 // Half Float Immediate
 5093 operand immH()
 5094 %{
 5095   match(ConH);
 5096 
 5097   op_cost(15);
 5098   format %{ %}
 5099   interface(CONST_INTER);
 5100 %}
 5101 
 5102 // Double Immediate zero
 5103 operand immD0()
 5104 %{
 5105   predicate(jlong_cast(n->getd()) == 0);
 5106   match(ConD);
 5107 
 5108   op_cost(5);
 5109   format %{ %}
 5110   interface(CONST_INTER);
 5111 %}
 5112 
 5113 // Double Immediate
 5114 operand immD()
 5115 %{
 5116   match(ConD);
 5117 
 5118   op_cost(15);
 5119   format %{ %}
 5120   interface(CONST_INTER);
 5121 %}
 5122 
 5123 // Immediates for special shifts (sign extend)
 5124 
 5125 // Constants for increment
 5126 operand immI_16()
 5127 %{
 5128   predicate(n->get_int() == 16);
 5129   match(ConI);
 5130 
 5131   format %{ %}
 5132   interface(CONST_INTER);
 5133 %}
 5134 
 5135 operand immI_24()
 5136 %{
 5137   predicate(n->get_int() == 24);
 5138   match(ConI);
 5139 
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 // Constant for byte-wide masking
 5145 operand immI_255()
 5146 %{
 5147   predicate(n->get_int() == 255);
 5148   match(ConI);
 5149 
 5150   format %{ %}
 5151   interface(CONST_INTER);
 5152 %}
 5153 
 5154 // Constant for short-wide masking
 5155 operand immI_65535()
 5156 %{
 5157   predicate(n->get_int() == 65535);
 5158   match(ConI);
 5159 
 5160   format %{ %}
 5161   interface(CONST_INTER);
 5162 %}
 5163 
 5164 // Constant for byte-wide masking
 5165 operand immL_255()
 5166 %{
 5167   predicate(n->get_long() == 255);
 5168   match(ConL);
 5169 
 5170   format %{ %}
 5171   interface(CONST_INTER);
 5172 %}
 5173 
 5174 // Constant for short-wide masking
 5175 operand immL_65535()
 5176 %{
 5177   predicate(n->get_long() == 65535);
 5178   match(ConL);
 5179 
 5180   format %{ %}
 5181   interface(CONST_INTER);
 5182 %}
 5183 
 5184 // AOT Runtime Constants Address
 5185 operand immAOTRuntimeConstantsAddress()
 5186 %{
 5187   // Check if the address is in the range of AOT Runtime Constants
 5188   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5189   match(ConP);
 5190 
 5191   op_cost(0);
 5192   format %{ %}
 5193   interface(CONST_INTER);
 5194 %}
 5195 
 5196 operand kReg()
 5197 %{
 5198   constraint(ALLOC_IN_RC(vectmask_reg));
 5199   match(RegVectMask);
 5200   format %{%}
 5201   interface(REG_INTER);
 5202 %}
 5203 
 5204 // Register Operands
 5205 // Integer Register
 5206 operand rRegI()
 5207 %{
 5208   constraint(ALLOC_IN_RC(int_reg));
 5209   match(RegI);
 5210 
 5211   match(rax_RegI);
 5212   match(rbx_RegI);
 5213   match(rcx_RegI);
 5214   match(rdx_RegI);
 5215   match(rdi_RegI);
 5216 
 5217   format %{ %}
 5218   interface(REG_INTER);
 5219 %}
 5220 
 5221 // Special Registers
 5222 operand rax_RegI()
 5223 %{
 5224   constraint(ALLOC_IN_RC(int_rax_reg));
 5225   match(RegI);
 5226   match(rRegI);
 5227 
 5228   format %{ "RAX" %}
 5229   interface(REG_INTER);
 5230 %}
 5231 
 5232 // Special Registers
 5233 operand rbx_RegI()
 5234 %{
 5235   constraint(ALLOC_IN_RC(int_rbx_reg));
 5236   match(RegI);
 5237   match(rRegI);
 5238 
 5239   format %{ "RBX" %}
 5240   interface(REG_INTER);
 5241 %}
 5242 
 5243 operand rcx_RegI()
 5244 %{
 5245   constraint(ALLOC_IN_RC(int_rcx_reg));
 5246   match(RegI);
 5247   match(rRegI);
 5248 
 5249   format %{ "RCX" %}
 5250   interface(REG_INTER);
 5251 %}
 5252 
 5253 operand rdx_RegI()
 5254 %{
 5255   constraint(ALLOC_IN_RC(int_rdx_reg));
 5256   match(RegI);
 5257   match(rRegI);
 5258 
 5259   format %{ "RDX" %}
 5260   interface(REG_INTER);
 5261 %}
 5262 
 5263 operand rdi_RegI()
 5264 %{
 5265   constraint(ALLOC_IN_RC(int_rdi_reg));
 5266   match(RegI);
 5267   match(rRegI);
 5268 
 5269   format %{ "RDI" %}
 5270   interface(REG_INTER);
 5271 %}
 5272 
 5273 operand no_rax_rdx_RegI()
 5274 %{
 5275   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5276   match(RegI);
 5277   match(rbx_RegI);
 5278   match(rcx_RegI);
 5279   match(rdi_RegI);
 5280 
 5281   format %{ %}
 5282   interface(REG_INTER);
 5283 %}
 5284 
 5285 operand no_rbp_r13_RegI()
 5286 %{
 5287   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5288   match(RegI);
 5289   match(rRegI);
 5290   match(rax_RegI);
 5291   match(rbx_RegI);
 5292   match(rcx_RegI);
 5293   match(rdx_RegI);
 5294   match(rdi_RegI);
 5295 
 5296   format %{ %}
 5297   interface(REG_INTER);
 5298 %}
 5299 
 5300 // Pointer Register
 5301 operand any_RegP()
 5302 %{
 5303   constraint(ALLOC_IN_RC(any_reg));
 5304   match(RegP);
 5305   match(rax_RegP);
 5306   match(rbx_RegP);
 5307   match(rdi_RegP);
 5308   match(rsi_RegP);
 5309   match(rbp_RegP);
 5310   match(r15_RegP);
 5311   match(rRegP);
 5312 
 5313   format %{ %}
 5314   interface(REG_INTER);
 5315 %}
 5316 
 5317 operand rRegP()
 5318 %{
 5319   constraint(ALLOC_IN_RC(ptr_reg));
 5320   match(RegP);
 5321   match(rax_RegP);
 5322   match(rbx_RegP);
 5323   match(rdi_RegP);
 5324   match(rsi_RegP);
 5325   match(rbp_RegP);  // See Q&A below about
 5326   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5327 
 5328   format %{ %}
 5329   interface(REG_INTER);
 5330 %}
 5331 
 5332 operand rRegN() %{
 5333   constraint(ALLOC_IN_RC(int_reg));
 5334   match(RegN);
 5335 
 5336   format %{ %}
 5337   interface(REG_INTER);
 5338 %}
 5339 
 5340 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5341 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5342 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5343 // The output of an instruction is controlled by the allocator, which respects
 5344 // register class masks, not match rules.  Unless an instruction mentions
 5345 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5346 // by the allocator as an input.
 5347 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5348 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5349 // result, RBP is not included in the output of the instruction either.
 5350 
 5351 // This operand is not allowed to use RBP even if
 5352 // RBP is not used to hold the frame pointer.
 5353 operand no_rbp_RegP()
 5354 %{
 5355   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5356   match(RegP);
 5357   match(rbx_RegP);
 5358   match(rsi_RegP);
 5359   match(rdi_RegP);
 5360 
 5361   format %{ %}
 5362   interface(REG_INTER);
 5363 %}
 5364 
 5365 // Special Registers
 5366 // Return a pointer value
 5367 operand rax_RegP()
 5368 %{
 5369   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5370   match(RegP);
 5371   match(rRegP);
 5372 
 5373   format %{ %}
 5374   interface(REG_INTER);
 5375 %}
 5376 
 5377 // Special Registers
 5378 // Return a compressed pointer value
 5379 operand rax_RegN()
 5380 %{
 5381   constraint(ALLOC_IN_RC(int_rax_reg));
 5382   match(RegN);
 5383   match(rRegN);
 5384 
 5385   format %{ %}
 5386   interface(REG_INTER);
 5387 %}
 5388 
 5389 // Used in AtomicAdd
 5390 operand rbx_RegP()
 5391 %{
 5392   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5393   match(RegP);
 5394   match(rRegP);
 5395 
 5396   format %{ %}
 5397   interface(REG_INTER);
 5398 %}
 5399 
 5400 operand rsi_RegP()
 5401 %{
 5402   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5403   match(RegP);
 5404   match(rRegP);
 5405 
 5406   format %{ %}
 5407   interface(REG_INTER);
 5408 %}
 5409 
 5410 operand rbp_RegP()
 5411 %{
 5412   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5413   match(RegP);
 5414   match(rRegP);
 5415 
 5416   format %{ %}
 5417   interface(REG_INTER);
 5418 %}
 5419 
 5420 // Used in rep stosq
 5421 operand rdi_RegP()
 5422 %{
 5423   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5424   match(RegP);
 5425   match(rRegP);
 5426 
 5427   format %{ %}
 5428   interface(REG_INTER);
 5429 %}
 5430 
 5431 operand r15_RegP()
 5432 %{
 5433   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5434   match(RegP);
 5435   match(rRegP);
 5436 
 5437   format %{ %}
 5438   interface(REG_INTER);
 5439 %}
 5440 
 5441 operand rRegL()
 5442 %{
 5443   constraint(ALLOC_IN_RC(long_reg));
 5444   match(RegL);
 5445   match(rax_RegL);
 5446   match(rdx_RegL);
 5447 
 5448   format %{ %}
 5449   interface(REG_INTER);
 5450 %}
 5451 
 5452 // Special Registers
 5453 operand no_rax_rdx_RegL()
 5454 %{
 5455   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5456   match(RegL);
 5457   match(rRegL);
 5458 
 5459   format %{ %}
 5460   interface(REG_INTER);
 5461 %}
 5462 
 5463 operand rax_RegL()
 5464 %{
 5465   constraint(ALLOC_IN_RC(long_rax_reg));
 5466   match(RegL);
 5467   match(rRegL);
 5468 
 5469   format %{ "RAX" %}
 5470   interface(REG_INTER);
 5471 %}
 5472 
 5473 operand rcx_RegL()
 5474 %{
 5475   constraint(ALLOC_IN_RC(long_rcx_reg));
 5476   match(RegL);
 5477   match(rRegL);
 5478 
 5479   format %{ %}
 5480   interface(REG_INTER);
 5481 %}
 5482 
 5483 operand rdx_RegL()
 5484 %{
 5485   constraint(ALLOC_IN_RC(long_rdx_reg));
 5486   match(RegL);
 5487   match(rRegL);
 5488 
 5489   format %{ %}
 5490   interface(REG_INTER);
 5491 %}
 5492 
 5493 operand r11_RegL()
 5494 %{
 5495   constraint(ALLOC_IN_RC(long_r11_reg));
 5496   match(RegL);
 5497   match(rRegL);
 5498 
 5499   format %{ %}
 5500   interface(REG_INTER);
 5501 %}
 5502 
 5503 operand no_rbp_r13_RegL()
 5504 %{
 5505   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5506   match(RegL);
 5507   match(rRegL);
 5508   match(rax_RegL);
 5509   match(rcx_RegL);
 5510   match(rdx_RegL);
 5511 
 5512   format %{ %}
 5513   interface(REG_INTER);
 5514 %}
 5515 
 5516 // Flags register, used as output of compare instructions
 5517 operand rFlagsReg()
 5518 %{
 5519   constraint(ALLOC_IN_RC(int_flags));
 5520   match(RegFlags);
 5521 
 5522   format %{ "RFLAGS" %}
 5523   interface(REG_INTER);
 5524 %}
 5525 
 5526 // Flags register, used as output of FLOATING POINT compare instructions
 5527 operand rFlagsRegU()
 5528 %{
 5529   constraint(ALLOC_IN_RC(int_flags));
 5530   match(RegFlags);
 5531 
 5532   format %{ "RFLAGS_U" %}
 5533   interface(REG_INTER);
 5534 %}
 5535 
 5536 operand rFlagsRegUCF() %{
 5537   constraint(ALLOC_IN_RC(int_flags));
 5538   match(RegFlags);
 5539   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5540 
 5541   format %{ "RFLAGS_U_CF" %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand rFlagsRegUCFE() %{
 5546   constraint(ALLOC_IN_RC(int_flags));
 5547   match(RegFlags);
 5548   predicate(UseAPX && VM_Version::supports_avx10_2());
 5549 
 5550   format %{ "RFLAGS_U_CFE" %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 // Float register operands
 5555 operand regF() %{
 5556    constraint(ALLOC_IN_RC(float_reg));
 5557    match(RegF);
 5558 
 5559    format %{ %}
 5560    interface(REG_INTER);
 5561 %}
 5562 
 5563 // Float register operands
 5564 operand legRegF() %{
 5565    constraint(ALLOC_IN_RC(float_reg_legacy));
 5566    match(RegF);
 5567 
 5568    format %{ %}
 5569    interface(REG_INTER);
 5570 %}
 5571 
 5572 // Float register operands
 5573 operand vlRegF() %{
 5574    constraint(ALLOC_IN_RC(float_reg_vl));
 5575    match(RegF);
 5576 
 5577    format %{ %}
 5578    interface(REG_INTER);
 5579 %}
 5580 
 5581 // Double register operands
 5582 operand regD() %{
 5583    constraint(ALLOC_IN_RC(double_reg));
 5584    match(RegD);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Double register operands
 5591 operand legRegD() %{
 5592    constraint(ALLOC_IN_RC(double_reg_legacy));
 5593    match(RegD);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Double register operands
 5600 operand vlRegD() %{
 5601    constraint(ALLOC_IN_RC(double_reg_vl));
 5602    match(RegD);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 //----------Memory Operands----------------------------------------------------
 5609 // Direct Memory Operand
 5610 // operand direct(immP addr)
 5611 // %{
 5612 //   match(addr);
 5613 
 5614 //   format %{ "[$addr]" %}
 5615 //   interface(MEMORY_INTER) %{
 5616 //     base(0xFFFFFFFF);
 5617 //     index(0x4);
 5618 //     scale(0x0);
 5619 //     disp($addr);
 5620 //   %}
 5621 // %}
 5622 
 5623 // Indirect Memory Operand
 5624 operand indirect(any_RegP reg)
 5625 %{
 5626   constraint(ALLOC_IN_RC(ptr_reg));
 5627   match(reg);
 5628 
 5629   format %{ "[$reg]" %}
 5630   interface(MEMORY_INTER) %{
 5631     base($reg);
 5632     index(0x4);
 5633     scale(0x0);
 5634     disp(0x0);
 5635   %}
 5636 %}
 5637 
 5638 // Indirect Memory Plus Short Offset Operand
 5639 operand indOffset8(any_RegP reg, immL8 off)
 5640 %{
 5641   constraint(ALLOC_IN_RC(ptr_reg));
 5642   match(AddP reg off);
 5643 
 5644   format %{ "[$reg + $off (8-bit)]" %}
 5645   interface(MEMORY_INTER) %{
 5646     base($reg);
 5647     index(0x4);
 5648     scale(0x0);
 5649     disp($off);
 5650   %}
 5651 %}
 5652 
 5653 // Indirect Memory Plus Long Offset Operand
 5654 operand indOffset32(any_RegP reg, immL32 off)
 5655 %{
 5656   constraint(ALLOC_IN_RC(ptr_reg));
 5657   match(AddP reg off);
 5658 
 5659   format %{ "[$reg + $off (32-bit)]" %}
 5660   interface(MEMORY_INTER) %{
 5661     base($reg);
 5662     index(0x4);
 5663     scale(0x0);
 5664     disp($off);
 5665   %}
 5666 %}
 5667 
 5668 // Indirect Memory Plus Index Register Plus Offset Operand
 5669 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5670 %{
 5671   constraint(ALLOC_IN_RC(ptr_reg));
 5672   match(AddP (AddP reg lreg) off);
 5673 
 5674   op_cost(10);
 5675   format %{"[$reg + $off + $lreg]" %}
 5676   interface(MEMORY_INTER) %{
 5677     base($reg);
 5678     index($lreg);
 5679     scale(0x0);
 5680     disp($off);
 5681   %}
 5682 %}
 5683 
 5684 // Indirect Memory Plus Index Register Plus Offset Operand
 5685 operand indIndex(any_RegP reg, rRegL lreg)
 5686 %{
 5687   constraint(ALLOC_IN_RC(ptr_reg));
 5688   match(AddP reg lreg);
 5689 
 5690   op_cost(10);
 5691   format %{"[$reg + $lreg]" %}
 5692   interface(MEMORY_INTER) %{
 5693     base($reg);
 5694     index($lreg);
 5695     scale(0x0);
 5696     disp(0x0);
 5697   %}
 5698 %}
 5699 
 5700 // Indirect Memory Times Scale Plus Index Register
 5701 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5702 %{
 5703   constraint(ALLOC_IN_RC(ptr_reg));
 5704   match(AddP reg (LShiftL lreg scale));
 5705 
 5706   op_cost(10);
 5707   format %{"[$reg + $lreg << $scale]" %}
 5708   interface(MEMORY_INTER) %{
 5709     base($reg);
 5710     index($lreg);
 5711     scale($scale);
 5712     disp(0x0);
 5713   %}
 5714 %}
 5715 
 5716 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5717 %{
 5718   constraint(ALLOC_IN_RC(ptr_reg));
 5719   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5720   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5721 
 5722   op_cost(10);
 5723   format %{"[$reg + pos $idx << $scale]" %}
 5724   interface(MEMORY_INTER) %{
 5725     base($reg);
 5726     index($idx);
 5727     scale($scale);
 5728     disp(0x0);
 5729   %}
 5730 %}
 5731 
 5732 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5733 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5734 %{
 5735   constraint(ALLOC_IN_RC(ptr_reg));
 5736   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5737 
 5738   op_cost(10);
 5739   format %{"[$reg + $off + $lreg << $scale]" %}
 5740   interface(MEMORY_INTER) %{
 5741     base($reg);
 5742     index($lreg);
 5743     scale($scale);
 5744     disp($off);
 5745   %}
 5746 %}
 5747 
 5748 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5749 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5750 %{
 5751   constraint(ALLOC_IN_RC(ptr_reg));
 5752   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5753   match(AddP (AddP reg (ConvI2L idx)) off);
 5754 
 5755   op_cost(10);
 5756   format %{"[$reg + $off + $idx]" %}
 5757   interface(MEMORY_INTER) %{
 5758     base($reg);
 5759     index($idx);
 5760     scale(0x0);
 5761     disp($off);
 5762   %}
 5763 %}
 5764 
 5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5767 %{
 5768   constraint(ALLOC_IN_RC(ptr_reg));
 5769   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5770   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5771 
 5772   op_cost(10);
 5773   format %{"[$reg + $off + $idx << $scale]" %}
 5774   interface(MEMORY_INTER) %{
 5775     base($reg);
 5776     index($idx);
 5777     scale($scale);
 5778     disp($off);
 5779   %}
 5780 %}
 5781 
 5782 // Indirect Narrow Oop Plus Offset Operand
 5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5784 // we can't free r12 even with CompressedOops::base() == nullptr.
 5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5786   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(AddP (DecodeN reg) off);
 5789 
 5790   op_cost(10);
 5791   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5792   interface(MEMORY_INTER) %{
 5793     base(0xc); // R12
 5794     index($reg);
 5795     scale(0x3);
 5796     disp($off);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Operand
 5801 operand indirectNarrow(rRegN reg)
 5802 %{
 5803   predicate(CompressedOops::shift() == 0);
 5804   constraint(ALLOC_IN_RC(ptr_reg));
 5805   match(DecodeN reg);
 5806 
 5807   format %{ "[$reg]" %}
 5808   interface(MEMORY_INTER) %{
 5809     base($reg);
 5810     index(0x4);
 5811     scale(0x0);
 5812     disp(0x0);
 5813   %}
 5814 %}
 5815 
 5816 // Indirect Memory Plus Short Offset Operand
 5817 operand indOffset8Narrow(rRegN reg, immL8 off)
 5818 %{
 5819   predicate(CompressedOops::shift() == 0);
 5820   constraint(ALLOC_IN_RC(ptr_reg));
 5821   match(AddP (DecodeN reg) off);
 5822 
 5823   format %{ "[$reg + $off (8-bit)]" %}
 5824   interface(MEMORY_INTER) %{
 5825     base($reg);
 5826     index(0x4);
 5827     scale(0x0);
 5828     disp($off);
 5829   %}
 5830 %}
 5831 
 5832 // Indirect Memory Plus Long Offset Operand
 5833 operand indOffset32Narrow(rRegN reg, immL32 off)
 5834 %{
 5835   predicate(CompressedOops::shift() == 0);
 5836   constraint(ALLOC_IN_RC(ptr_reg));
 5837   match(AddP (DecodeN reg) off);
 5838 
 5839   format %{ "[$reg + $off (32-bit)]" %}
 5840   interface(MEMORY_INTER) %{
 5841     base($reg);
 5842     index(0x4);
 5843     scale(0x0);
 5844     disp($off);
 5845   %}
 5846 %}
 5847 
 5848 // Indirect Memory Plus Index Register Plus Offset Operand
 5849 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5850 %{
 5851   predicate(CompressedOops::shift() == 0);
 5852   constraint(ALLOC_IN_RC(ptr_reg));
 5853   match(AddP (AddP (DecodeN reg) lreg) off);
 5854 
 5855   op_cost(10);
 5856   format %{"[$reg + $off + $lreg]" %}
 5857   interface(MEMORY_INTER) %{
 5858     base($reg);
 5859     index($lreg);
 5860     scale(0x0);
 5861     disp($off);
 5862   %}
 5863 %}
 5864 
 5865 // Indirect Memory Plus Index Register Plus Offset Operand
 5866 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5867 %{
 5868   predicate(CompressedOops::shift() == 0);
 5869   constraint(ALLOC_IN_RC(ptr_reg));
 5870   match(AddP (DecodeN reg) lreg);
 5871 
 5872   op_cost(10);
 5873   format %{"[$reg + $lreg]" %}
 5874   interface(MEMORY_INTER) %{
 5875     base($reg);
 5876     index($lreg);
 5877     scale(0x0);
 5878     disp(0x0);
 5879   %}
 5880 %}
 5881 
 5882 // Indirect Memory Times Scale Plus Index Register
 5883 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5884 %{
 5885   predicate(CompressedOops::shift() == 0);
 5886   constraint(ALLOC_IN_RC(ptr_reg));
 5887   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5888 
 5889   op_cost(10);
 5890   format %{"[$reg + $lreg << $scale]" %}
 5891   interface(MEMORY_INTER) %{
 5892     base($reg);
 5893     index($lreg);
 5894     scale($scale);
 5895     disp(0x0);
 5896   %}
 5897 %}
 5898 
 5899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5900 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5901 %{
 5902   predicate(CompressedOops::shift() == 0);
 5903   constraint(ALLOC_IN_RC(ptr_reg));
 5904   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5905 
 5906   op_cost(10);
 5907   format %{"[$reg + $off + $lreg << $scale]" %}
 5908   interface(MEMORY_INTER) %{
 5909     base($reg);
 5910     index($lreg);
 5911     scale($scale);
 5912     disp($off);
 5913   %}
 5914 %}
 5915 
 5916 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5917 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5918 %{
 5919   constraint(ALLOC_IN_RC(ptr_reg));
 5920   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5921   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5922 
 5923   op_cost(10);
 5924   format %{"[$reg + $off + $idx]" %}
 5925   interface(MEMORY_INTER) %{
 5926     base($reg);
 5927     index($idx);
 5928     scale(0x0);
 5929     disp($off);
 5930   %}
 5931 %}
 5932 
 5933 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5934 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5935 %{
 5936   constraint(ALLOC_IN_RC(ptr_reg));
 5937   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5938   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5939 
 5940   op_cost(10);
 5941   format %{"[$reg + $off + $idx << $scale]" %}
 5942   interface(MEMORY_INTER) %{
 5943     base($reg);
 5944     index($idx);
 5945     scale($scale);
 5946     disp($off);
 5947   %}
 5948 %}
 5949 
 5950 //----------Special Memory Operands--------------------------------------------
 5951 // Stack Slot Operand - This operand is used for loading and storing temporary
 5952 //                      values on the stack where a match requires a value to
 5953 //                      flow through memory.
 5954 operand stackSlotP(sRegP reg)
 5955 %{
 5956   constraint(ALLOC_IN_RC(stack_slots));
 5957   // No match rule because this operand is only generated in matching
 5958 
 5959   format %{ "[$reg]" %}
 5960   interface(MEMORY_INTER) %{
 5961     base(0x4);   // RSP
 5962     index(0x4);  // No Index
 5963     scale(0x0);  // No Scale
 5964     disp($reg);  // Stack Offset
 5965   %}
 5966 %}
 5967 
 5968 operand stackSlotI(sRegI reg)
 5969 %{
 5970   constraint(ALLOC_IN_RC(stack_slots));
 5971   // No match rule because this operand is only generated in matching
 5972 
 5973   format %{ "[$reg]" %}
 5974   interface(MEMORY_INTER) %{
 5975     base(0x4);   // RSP
 5976     index(0x4);  // No Index
 5977     scale(0x0);  // No Scale
 5978     disp($reg);  // Stack Offset
 5979   %}
 5980 %}
 5981 
 5982 operand stackSlotF(sRegF reg)
 5983 %{
 5984   constraint(ALLOC_IN_RC(stack_slots));
 5985   // No match rule because this operand is only generated in matching
 5986 
 5987   format %{ "[$reg]" %}
 5988   interface(MEMORY_INTER) %{
 5989     base(0x4);   // RSP
 5990     index(0x4);  // No Index
 5991     scale(0x0);  // No Scale
 5992     disp($reg);  // Stack Offset
 5993   %}
 5994 %}
 5995 
 5996 operand stackSlotD(sRegD reg)
 5997 %{
 5998   constraint(ALLOC_IN_RC(stack_slots));
 5999   // No match rule because this operand is only generated in matching
 6000 
 6001   format %{ "[$reg]" %}
 6002   interface(MEMORY_INTER) %{
 6003     base(0x4);   // RSP
 6004     index(0x4);  // No Index
 6005     scale(0x0);  // No Scale
 6006     disp($reg);  // Stack Offset
 6007   %}
 6008 %}
 6009 operand stackSlotL(sRegL reg)
 6010 %{
 6011   constraint(ALLOC_IN_RC(stack_slots));
 6012   // No match rule because this operand is only generated in matching
 6013 
 6014   format %{ "[$reg]" %}
 6015   interface(MEMORY_INTER) %{
 6016     base(0x4);   // RSP
 6017     index(0x4);  // No Index
 6018     scale(0x0);  // No Scale
 6019     disp($reg);  // Stack Offset
 6020   %}
 6021 %}
 6022 
 6023 //----------Conditional Branch Operands----------------------------------------
 6024 // Comparison Op  - This is the operation of the comparison, and is limited to
 6025 //                  the following set of codes:
 6026 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6027 //
 6028 // Other attributes of the comparison, such as unsignedness, are specified
 6029 // by the comparison instruction that sets a condition code flags register.
 6030 // That result is represented by a flags operand whose subtype is appropriate
 6031 // to the unsignedness (etc.) of the comparison.
 6032 //
 6033 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6034 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6035 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6036 
 6037 // Comparison Code
 6038 operand cmpOp()
 6039 %{
 6040   match(Bool);
 6041 
 6042   format %{ "" %}
 6043   interface(COND_INTER) %{
 6044     equal(0x4, "e");
 6045     not_equal(0x5, "ne");
 6046     less(0xc, "l");
 6047     greater_equal(0xd, "ge");
 6048     less_equal(0xe, "le");
 6049     greater(0xf, "g");
 6050     overflow(0x0, "o");
 6051     no_overflow(0x1, "no");
 6052   %}
 6053 %}
 6054 
 6055 // Comparison Code, unsigned compare.  Used by FP also, with
 6056 // C2 (unordered) turned into GT or LT already.  The other bits
 6057 // C0 and C3 are turned into Carry & Zero flags.
 6058 operand cmpOpU()
 6059 %{
 6060   match(Bool);
 6061 
 6062   format %{ "" %}
 6063   interface(COND_INTER) %{
 6064     equal(0x4, "e");
 6065     not_equal(0x5, "ne");
 6066     less(0x2, "b");
 6067     greater_equal(0x3, "ae");
 6068     less_equal(0x6, "be");
 6069     greater(0x7, "a");
 6070     overflow(0x0, "o");
 6071     no_overflow(0x1, "no");
 6072   %}
 6073 %}
 6074 
 6075 
 6076 // Floating comparisons that don't require any fixup for the unordered case,
 6077 // If both inputs of the comparison are the same, ZF is always set so we
 6078 // don't need to use cmpOpUCF2 for eq/ne
 6079 operand cmpOpUCF() %{
 6080   match(Bool);
 6081   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6082             (n->as_Bool()->_test._test == BoolTest::lt ||
 6083              n->as_Bool()->_test._test == BoolTest::ge ||
 6084              n->as_Bool()->_test._test == BoolTest::le ||
 6085              n->as_Bool()->_test._test == BoolTest::gt ||
 6086              n->in(1)->in(1) == n->in(1)->in(2)));
 6087   format %{ "" %}
 6088   interface(COND_INTER) %{
 6089     equal(0xb, "np");
 6090     not_equal(0xa, "p");
 6091     less(0x2, "b");
 6092     greater_equal(0x3, "ae");
 6093     less_equal(0x6, "be");
 6094     greater(0x7, "a");
 6095     overflow(0x0, "o");
 6096     no_overflow(0x1, "no");
 6097   %}
 6098 %}
 6099 
 6100 
 6101 // Floating comparisons that can be fixed up with extra conditional jumps
 6102 operand cmpOpUCF2() %{
 6103   match(Bool);
 6104   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6105             (n->as_Bool()->_test._test == BoolTest::ne ||
 6106              n->as_Bool()->_test._test == BoolTest::eq) &&
 6107             n->in(1)->in(1) != n->in(1)->in(2));
 6108   format %{ "" %}
 6109   interface(COND_INTER) %{
 6110     equal(0x4, "e");
 6111     not_equal(0x5, "ne");
 6112     less(0x2, "b");
 6113     greater_equal(0x3, "ae");
 6114     less_equal(0x6, "be");
 6115     greater(0x7, "a");
 6116     overflow(0x0, "o");
 6117     no_overflow(0x1, "no");
 6118   %}
 6119 %}
 6120 
 6121 
 6122 // Floating point comparisons that set condition flags to test more directly,
 6123 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6124 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6125 // latter conditions to ones that use unsigned tests before passing into an
 6126 // instruction because the preceding comparison might be based on a three way
 6127 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6128 operand cmpOpUCFE()
 6129 %{
 6130   match(Bool);
 6131   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6132             (n->as_Bool()->_test._test == BoolTest::ne ||
 6133              n->as_Bool()->_test._test == BoolTest::eq ||
 6134              n->as_Bool()->_test._test == BoolTest::lt ||
 6135              n->as_Bool()->_test._test == BoolTest::ge ||
 6136              n->as_Bool()->_test._test == BoolTest::le ||
 6137              n->as_Bool()->_test._test == BoolTest::gt));
 6138 
 6139   format %{ "" %}
 6140   interface(COND_INTER) %{
 6141     equal(0x4, "e");
 6142     not_equal(0x5, "ne");
 6143     less(0x2, "b");
 6144     greater_equal(0x3, "ae");
 6145     less_equal(0x6, "be");
 6146     greater(0x7, "a");
 6147     overflow(0x0, "o");
 6148     no_overflow(0x1, "no");
 6149   %}
 6150 %}
 6151 
 6152 // Operands for bound floating pointer register arguments
 6153 operand rxmm0() %{
 6154   constraint(ALLOC_IN_RC(xmm0_reg));
 6155   match(VecX);
 6156   format%{%}
 6157   interface(REG_INTER);
 6158 %}
 6159 
 6160 // Vectors
 6161 
 6162 // Dummy generic vector class. Should be used for all vector operands.
 6163 // Replaced with vec[SDXYZ] during post-selection pass.
 6164 operand vec() %{
 6165   constraint(ALLOC_IN_RC(dynamic));
 6166   match(VecX);
 6167   match(VecY);
 6168   match(VecZ);
 6169   match(VecS);
 6170   match(VecD);
 6171 
 6172   format %{ %}
 6173   interface(REG_INTER);
 6174 %}
 6175 
 6176 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6177 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6178 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6179 // runtime code generation via reg_class_dynamic.
 6180 operand legVec() %{
 6181   constraint(ALLOC_IN_RC(dynamic));
 6182   match(VecX);
 6183   match(VecY);
 6184   match(VecZ);
 6185   match(VecS);
 6186   match(VecD);
 6187 
 6188   format %{ %}
 6189   interface(REG_INTER);
 6190 %}
 6191 
 6192 // Replaces vec during post-selection cleanup. See above.
 6193 operand vecS() %{
 6194   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6195   match(VecS);
 6196 
 6197   format %{ %}
 6198   interface(REG_INTER);
 6199 %}
 6200 
 6201 // Replaces legVec during post-selection cleanup. See above.
 6202 operand legVecS() %{
 6203   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6204   match(VecS);
 6205 
 6206   format %{ %}
 6207   interface(REG_INTER);
 6208 %}
 6209 
 6210 // Replaces vec during post-selection cleanup. See above.
 6211 operand vecD() %{
 6212   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6213   match(VecD);
 6214 
 6215   format %{ %}
 6216   interface(REG_INTER);
 6217 %}
 6218 
 6219 // Replaces legVec during post-selection cleanup. See above.
 6220 operand legVecD() %{
 6221   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6222   match(VecD);
 6223 
 6224   format %{ %}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Replaces vec during post-selection cleanup. See above.
 6229 operand vecX() %{
 6230   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6231   match(VecX);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Replaces legVec during post-selection cleanup. See above.
 6238 operand legVecX() %{
 6239   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6240   match(VecX);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces vec during post-selection cleanup. See above.
 6247 operand vecY() %{
 6248   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6249   match(VecY);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces legVec during post-selection cleanup. See above.
 6256 operand legVecY() %{
 6257   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6258   match(VecY);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces vec during post-selection cleanup. See above.
 6265 operand vecZ() %{
 6266   constraint(ALLOC_IN_RC(vectorz_reg));
 6267   match(VecZ);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces legVec during post-selection cleanup. See above.
 6274 operand legVecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 //----------OPERAND CLASSES----------------------------------------------------
 6283 // Operand Classes are groups of operands that are used as to simplify
 6284 // instruction definitions by not requiring the AD writer to specify separate
 6285 // instructions for every form of operand when the instruction accepts
 6286 // multiple operand types with the same basic encoding and format.  The classic
 6287 // case of this is memory operands.
 6288 
 6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6290                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6291                indCompressedOopOffset,
 6292                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6293                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6294                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6295 
 6296 //----------PIPELINE-----------------------------------------------------------
 6297 // Rules which define the behavior of the target architectures pipeline.
 6298 pipeline %{
 6299 
 6300 //----------ATTRIBUTES---------------------------------------------------------
 6301 attributes %{
 6302   variable_size_instructions;        // Fixed size instructions
 6303   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6304   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6305   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6306   instruction_fetch_units = 1;       // of 16 bytes
 6307 %}
 6308 
 6309 //----------RESOURCES----------------------------------------------------------
 6310 // Resources are the functional units available to the machine
 6311 
 6312 // Generic P2/P3 pipeline
 6313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6314 // 3 instructions decoded per cycle.
 6315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6316 // 3 ALU op, only ALU0 handles mul instructions.
 6317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6318            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6319            BR, FPU,
 6320            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6321 
 6322 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6323 // Pipeline Description specifies the stages in the machine's pipeline
 6324 
 6325 // Generic P2/P3 pipeline
 6326 pipe_desc(S0, S1, S2, S3, S4, S5);
 6327 
 6328 //----------PIPELINE CLASSES---------------------------------------------------
 6329 // Pipeline Classes describe the stages in which input and output are
 6330 // referenced by the hardware pipeline.
 6331 
 6332 // Naming convention: ialu or fpu
 6333 // Then: _reg
 6334 // Then: _reg if there is a 2nd register
 6335 // Then: _long if it's a pair of instructions implementing a long
 6336 // Then: _fat if it requires the big decoder
 6337 //   Or: _mem if it requires the big decoder and a memory unit.
 6338 
 6339 // Integer ALU reg operation
 6340 pipe_class ialu_reg(rRegI dst)
 6341 %{
 6342     single_instruction;
 6343     dst    : S4(write);
 6344     dst    : S3(read);
 6345     DECODE : S0;        // any decoder
 6346     ALU    : S3;        // any alu
 6347 %}
 6348 
 6349 // Long ALU reg operation
 6350 pipe_class ialu_reg_long(rRegL dst)
 6351 %{
 6352     instruction_count(2);
 6353     dst    : S4(write);
 6354     dst    : S3(read);
 6355     DECODE : S0(2);     // any 2 decoders
 6356     ALU    : S3(2);     // both alus
 6357 %}
 6358 
 6359 // Integer ALU reg operation using big decoder
 6360 pipe_class ialu_reg_fat(rRegI dst)
 6361 %{
 6362     single_instruction;
 6363     dst    : S4(write);
 6364     dst    : S3(read);
 6365     D0     : S0;        // big decoder only
 6366     ALU    : S3;        // any alu
 6367 %}
 6368 
 6369 // Integer ALU reg-reg operation
 6370 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6371 %{
 6372     single_instruction;
 6373     dst    : S4(write);
 6374     src    : S3(read);
 6375     DECODE : S0;        // any decoder
 6376     ALU    : S3;        // any alu
 6377 %}
 6378 
 6379 // Integer ALU reg-reg operation
 6380 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6381 %{
 6382     single_instruction;
 6383     dst    : S4(write);
 6384     src    : S3(read);
 6385     D0     : S0;        // big decoder only
 6386     ALU    : S3;        // any alu
 6387 %}
 6388 
 6389 // Integer ALU reg-mem operation
 6390 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6391 %{
 6392     single_instruction;
 6393     dst    : S5(write);
 6394     mem    : S3(read);
 6395     D0     : S0;        // big decoder only
 6396     ALU    : S4;        // any alu
 6397     MEM    : S3;        // any mem
 6398 %}
 6399 
 6400 // Integer mem operation (prefetch)
 6401 pipe_class ialu_mem(memory mem)
 6402 %{
 6403     single_instruction;
 6404     mem    : S3(read);
 6405     D0     : S0;        // big decoder only
 6406     MEM    : S3;        // any mem
 6407 %}
 6408 
 6409 // Integer Store to Memory
 6410 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6411 %{
 6412     single_instruction;
 6413     mem    : S3(read);
 6414     src    : S5(read);
 6415     D0     : S0;        // big decoder only
 6416     ALU    : S4;        // any alu
 6417     MEM    : S3;
 6418 %}
 6419 
 6420 // // Long Store to Memory
 6421 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6422 // %{
 6423 //     instruction_count(2);
 6424 //     mem    : S3(read);
 6425 //     src    : S5(read);
 6426 //     D0     : S0(2);          // big decoder only; twice
 6427 //     ALU    : S4(2);     // any 2 alus
 6428 //     MEM    : S3(2);  // Both mems
 6429 // %}
 6430 
 6431 // Integer Store to Memory
 6432 pipe_class ialu_mem_imm(memory mem)
 6433 %{
 6434     single_instruction;
 6435     mem    : S3(read);
 6436     D0     : S0;        // big decoder only
 6437     ALU    : S4;        // any alu
 6438     MEM    : S3;
 6439 %}
 6440 
 6441 // Integer ALU0 reg-reg operation
 6442 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6443 %{
 6444     single_instruction;
 6445     dst    : S4(write);
 6446     src    : S3(read);
 6447     D0     : S0;        // Big decoder only
 6448     ALU0   : S3;        // only alu0
 6449 %}
 6450 
 6451 // Integer ALU0 reg-mem operation
 6452 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6453 %{
 6454     single_instruction;
 6455     dst    : S5(write);
 6456     mem    : S3(read);
 6457     D0     : S0;        // big decoder only
 6458     ALU0   : S4;        // ALU0 only
 6459     MEM    : S3;        // any mem
 6460 %}
 6461 
 6462 // Integer ALU reg-reg operation
 6463 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6464 %{
 6465     single_instruction;
 6466     cr     : S4(write);
 6467     src1   : S3(read);
 6468     src2   : S3(read);
 6469     DECODE : S0;        // any decoder
 6470     ALU    : S3;        // any alu
 6471 %}
 6472 
 6473 // Integer ALU reg-imm operation
 6474 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6475 %{
 6476     single_instruction;
 6477     cr     : S4(write);
 6478     src1   : S3(read);
 6479     DECODE : S0;        // any decoder
 6480     ALU    : S3;        // any alu
 6481 %}
 6482 
 6483 // Integer ALU reg-mem operation
 6484 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6485 %{
 6486     single_instruction;
 6487     cr     : S4(write);
 6488     src1   : S3(read);
 6489     src2   : S3(read);
 6490     D0     : S0;        // big decoder only
 6491     ALU    : S4;        // any alu
 6492     MEM    : S3;
 6493 %}
 6494 
 6495 // Conditional move reg-reg
 6496 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6497 %{
 6498     instruction_count(4);
 6499     y      : S4(read);
 6500     q      : S3(read);
 6501     p      : S3(read);
 6502     DECODE : S0(4);     // any decoder
 6503 %}
 6504 
 6505 // Conditional move reg-reg
 6506 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6507 %{
 6508     single_instruction;
 6509     dst    : S4(write);
 6510     src    : S3(read);
 6511     cr     : S3(read);
 6512     DECODE : S0;        // any decoder
 6513 %}
 6514 
 6515 // Conditional move reg-mem
 6516 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6517 %{
 6518     single_instruction;
 6519     dst    : S4(write);
 6520     src    : S3(read);
 6521     cr     : S3(read);
 6522     DECODE : S0;        // any decoder
 6523     MEM    : S3;
 6524 %}
 6525 
 6526 // Conditional move reg-reg long
 6527 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6528 %{
 6529     single_instruction;
 6530     dst    : S4(write);
 6531     src    : S3(read);
 6532     cr     : S3(read);
 6533     DECODE : S0(2);     // any 2 decoders
 6534 %}
 6535 
 6536 // Float reg-reg operation
 6537 pipe_class fpu_reg(regD dst)
 6538 %{
 6539     instruction_count(2);
 6540     dst    : S3(read);
 6541     DECODE : S0(2);     // any 2 decoders
 6542     FPU    : S3;
 6543 %}
 6544 
 6545 // Float reg-reg operation
 6546 pipe_class fpu_reg_reg(regD dst, regD src)
 6547 %{
 6548     instruction_count(2);
 6549     dst    : S4(write);
 6550     src    : S3(read);
 6551     DECODE : S0(2);     // any 2 decoders
 6552     FPU    : S3;
 6553 %}
 6554 
 6555 // Float reg-reg operation
 6556 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6557 %{
 6558     instruction_count(3);
 6559     dst    : S4(write);
 6560     src1   : S3(read);
 6561     src2   : S3(read);
 6562     DECODE : S0(3);     // any 3 decoders
 6563     FPU    : S3(2);
 6564 %}
 6565 
 6566 // Float reg-reg operation
 6567 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6568 %{
 6569     instruction_count(4);
 6570     dst    : S4(write);
 6571     src1   : S3(read);
 6572     src2   : S3(read);
 6573     src3   : S3(read);
 6574     DECODE : S0(4);     // any 3 decoders
 6575     FPU    : S3(2);
 6576 %}
 6577 
 6578 // Float reg-reg operation
 6579 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6580 %{
 6581     instruction_count(4);
 6582     dst    : S4(write);
 6583     src1   : S3(read);
 6584     src2   : S3(read);
 6585     src3   : S3(read);
 6586     DECODE : S1(3);     // any 3 decoders
 6587     D0     : S0;        // Big decoder only
 6588     FPU    : S3(2);
 6589     MEM    : S3;
 6590 %}
 6591 
 6592 // Float reg-mem operation
 6593 pipe_class fpu_reg_mem(regD dst, memory mem)
 6594 %{
 6595     instruction_count(2);
 6596     dst    : S5(write);
 6597     mem    : S3(read);
 6598     D0     : S0;        // big decoder only
 6599     DECODE : S1;        // any decoder for FPU POP
 6600     FPU    : S4;
 6601     MEM    : S3;        // any mem
 6602 %}
 6603 
 6604 // Float reg-mem operation
 6605 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6606 %{
 6607     instruction_count(3);
 6608     dst    : S5(write);
 6609     src1   : S3(read);
 6610     mem    : S3(read);
 6611     D0     : S0;        // big decoder only
 6612     DECODE : S1(2);     // any decoder for FPU POP
 6613     FPU    : S4;
 6614     MEM    : S3;        // any mem
 6615 %}
 6616 
 6617 // Float mem-reg operation
 6618 pipe_class fpu_mem_reg(memory mem, regD src)
 6619 %{
 6620     instruction_count(2);
 6621     src    : S5(read);
 6622     mem    : S3(read);
 6623     DECODE : S0;        // any decoder for FPU PUSH
 6624     D0     : S1;        // big decoder only
 6625     FPU    : S4;
 6626     MEM    : S3;        // any mem
 6627 %}
 6628 
 6629 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6630 %{
 6631     instruction_count(3);
 6632     src1   : S3(read);
 6633     src2   : S3(read);
 6634     mem    : S3(read);
 6635     DECODE : S0(2);     // any decoder for FPU PUSH
 6636     D0     : S1;        // big decoder only
 6637     FPU    : S4;
 6638     MEM    : S3;        // any mem
 6639 %}
 6640 
 6641 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6642 %{
 6643     instruction_count(3);
 6644     src1   : S3(read);
 6645     src2   : S3(read);
 6646     mem    : S4(read);
 6647     DECODE : S0;        // any decoder for FPU PUSH
 6648     D0     : S0(2);     // big decoder only
 6649     FPU    : S4;
 6650     MEM    : S3(2);     // any mem
 6651 %}
 6652 
 6653 pipe_class fpu_mem_mem(memory dst, memory src1)
 6654 %{
 6655     instruction_count(2);
 6656     src1   : S3(read);
 6657     dst    : S4(read);
 6658     D0     : S0(2);     // big decoder only
 6659     MEM    : S3(2);     // any mem
 6660 %}
 6661 
 6662 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6663 %{
 6664     instruction_count(3);
 6665     src1   : S3(read);
 6666     src2   : S3(read);
 6667     dst    : S4(read);
 6668     D0     : S0(3);     // big decoder only
 6669     FPU    : S4;
 6670     MEM    : S3(3);     // any mem
 6671 %}
 6672 
 6673 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6674 %{
 6675     instruction_count(3);
 6676     src1   : S4(read);
 6677     mem    : S4(read);
 6678     DECODE : S0;        // any decoder for FPU PUSH
 6679     D0     : S0(2);     // big decoder only
 6680     FPU    : S4;
 6681     MEM    : S3(2);     // any mem
 6682 %}
 6683 
 6684 // Float load constant
 6685 pipe_class fpu_reg_con(regD dst)
 6686 %{
 6687     instruction_count(2);
 6688     dst    : S5(write);
 6689     D0     : S0;        // big decoder only for the load
 6690     DECODE : S1;        // any decoder for FPU POP
 6691     FPU    : S4;
 6692     MEM    : S3;        // any mem
 6693 %}
 6694 
 6695 // Float load constant
 6696 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6697 %{
 6698     instruction_count(3);
 6699     dst    : S5(write);
 6700     src    : S3(read);
 6701     D0     : S0;        // big decoder only for the load
 6702     DECODE : S1(2);     // any decoder for FPU POP
 6703     FPU    : S4;
 6704     MEM    : S3;        // any mem
 6705 %}
 6706 
 6707 // UnConditional branch
 6708 pipe_class pipe_jmp(label labl)
 6709 %{
 6710     single_instruction;
 6711     BR   : S3;
 6712 %}
 6713 
 6714 // Conditional branch
 6715 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6716 %{
 6717     single_instruction;
 6718     cr    : S1(read);
 6719     BR    : S3;
 6720 %}
 6721 
 6722 // Allocation idiom
 6723 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6724 %{
 6725     instruction_count(1); force_serialization;
 6726     fixed_latency(6);
 6727     heap_ptr : S3(read);
 6728     DECODE   : S0(3);
 6729     D0       : S2;
 6730     MEM      : S3;
 6731     ALU      : S3(2);
 6732     dst      : S5(write);
 6733     BR       : S5;
 6734 %}
 6735 
 6736 // Generic big/slow expanded idiom
 6737 pipe_class pipe_slow()
 6738 %{
 6739     instruction_count(10); multiple_bundles; force_serialization;
 6740     fixed_latency(100);
 6741     D0  : S0(2);
 6742     MEM : S3(2);
 6743 %}
 6744 
 6745 // The real do-nothing guy
 6746 pipe_class empty()
 6747 %{
 6748     instruction_count(0);
 6749 %}
 6750 
 6751 // Define the class for the Nop node
 6752 define
 6753 %{
 6754    MachNop = empty;
 6755 %}
 6756 
 6757 %}
 6758 
 6759 //----------INSTRUCTIONS-------------------------------------------------------
 6760 //
 6761 // match      -- States which machine-independent subtree may be replaced
 6762 //               by this instruction.
 6763 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6764 //               selection to identify a minimum cost tree of machine
 6765 //               instructions that matches a tree of machine-independent
 6766 //               instructions.
 6767 // format     -- A string providing the disassembly for this instruction.
 6768 //               The value of an instruction's operand may be inserted
 6769 //               by referring to it with a '$' prefix.
 6770 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6771 //               to within an encode class as $primary, $secondary, and $tertiary
 6772 //               rrspectively.  The primary opcode is commonly used to
 6773 //               indicate the type of machine instruction, while secondary
 6774 //               and tertiary are often used for prefix options or addressing
 6775 //               modes.
 6776 // ins_encode -- A list of encode classes with parameters. The encode class
 6777 //               name must have been defined in an 'enc_class' specification
 6778 //               in the encode section of the architecture description.
 6779 
 6780 // ============================================================================
 6781 
 6782 instruct ShouldNotReachHere() %{
 6783   match(Halt);
 6784   format %{ "stop\t# ShouldNotReachHere" %}
 6785   ins_encode %{
 6786     if (is_reachable()) {
 6787       const char* str = __ code_string(_halt_reason);
 6788       __ stop(str);
 6789     }
 6790   %}
 6791   ins_pipe(pipe_slow);
 6792 %}
 6793 
 6794 // ============================================================================
 6795 
 6796 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6797 // Load Float
 6798 instruct MoveF2VL(vlRegF dst, regF src) %{
 6799   match(Set dst src);
 6800   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6801   ins_encode %{
 6802     ShouldNotReachHere();
 6803   %}
 6804   ins_pipe( fpu_reg_reg );
 6805 %}
 6806 
 6807 // Load Float
 6808 instruct MoveF2LEG(legRegF dst, regF src) %{
 6809   match(Set dst src);
 6810   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6811   ins_encode %{
 6812     ShouldNotReachHere();
 6813   %}
 6814   ins_pipe( fpu_reg_reg );
 6815 %}
 6816 
 6817 // Load Float
 6818 instruct MoveVL2F(regF dst, vlRegF src) %{
 6819   match(Set dst src);
 6820   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6821   ins_encode %{
 6822     ShouldNotReachHere();
 6823   %}
 6824   ins_pipe( fpu_reg_reg );
 6825 %}
 6826 
 6827 // Load Float
 6828 instruct MoveLEG2F(regF dst, legRegF src) %{
 6829   match(Set dst src);
 6830   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6831   ins_encode %{
 6832     ShouldNotReachHere();
 6833   %}
 6834   ins_pipe( fpu_reg_reg );
 6835 %}
 6836 
 6837 // Load Double
 6838 instruct MoveD2VL(vlRegD dst, regD src) %{
 6839   match(Set dst src);
 6840   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6841   ins_encode %{
 6842     ShouldNotReachHere();
 6843   %}
 6844   ins_pipe( fpu_reg_reg );
 6845 %}
 6846 
 6847 // Load Double
 6848 instruct MoveD2LEG(legRegD dst, regD src) %{
 6849   match(Set dst src);
 6850   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6851   ins_encode %{
 6852     ShouldNotReachHere();
 6853   %}
 6854   ins_pipe( fpu_reg_reg );
 6855 %}
 6856 
 6857 // Load Double
 6858 instruct MoveVL2D(regD dst, vlRegD src) %{
 6859   match(Set dst src);
 6860   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6861   ins_encode %{
 6862     ShouldNotReachHere();
 6863   %}
 6864   ins_pipe( fpu_reg_reg );
 6865 %}
 6866 
 6867 // Load Double
 6868 instruct MoveLEG2D(regD dst, legRegD src) %{
 6869   match(Set dst src);
 6870   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6871   ins_encode %{
 6872     ShouldNotReachHere();
 6873   %}
 6874   ins_pipe( fpu_reg_reg );
 6875 %}
 6876 
 6877 //----------Load/Store/Move Instructions---------------------------------------
 6878 //----------Load Instructions--------------------------------------------------
 6879 
 6880 // Load Byte (8 bit signed)
 6881 instruct loadB(rRegI dst, memory mem)
 6882 %{
 6883   match(Set dst (LoadB mem));
 6884 
 6885   ins_cost(125);
 6886   format %{ "movsbl  $dst, $mem\t# byte" %}
 6887 
 6888   ins_encode %{
 6889     __ movsbl($dst$$Register, $mem$$Address);
 6890   %}
 6891 
 6892   ins_pipe(ialu_reg_mem);
 6893 %}
 6894 
 6895 // Load Byte (8 bit signed) into Long Register
 6896 instruct loadB2L(rRegL dst, memory mem)
 6897 %{
 6898   match(Set dst (ConvI2L (LoadB mem)));
 6899 
 6900   ins_cost(125);
 6901   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6902 
 6903   ins_encode %{
 6904     __ movsbq($dst$$Register, $mem$$Address);
 6905   %}
 6906 
 6907   ins_pipe(ialu_reg_mem);
 6908 %}
 6909 
 6910 // Load Unsigned Byte (8 bit UNsigned)
 6911 instruct loadUB(rRegI dst, memory mem)
 6912 %{
 6913   match(Set dst (LoadUB mem));
 6914 
 6915   ins_cost(125);
 6916   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6917 
 6918   ins_encode %{
 6919     __ movzbl($dst$$Register, $mem$$Address);
 6920   %}
 6921 
 6922   ins_pipe(ialu_reg_mem);
 6923 %}
 6924 
 6925 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6926 instruct loadUB2L(rRegL dst, memory mem)
 6927 %{
 6928   match(Set dst (ConvI2L (LoadUB mem)));
 6929 
 6930   ins_cost(125);
 6931   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6932 
 6933   ins_encode %{
 6934     __ movzbq($dst$$Register, $mem$$Address);
 6935   %}
 6936 
 6937   ins_pipe(ialu_reg_mem);
 6938 %}
 6939 
 6940 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6941 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6942   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6943   effect(KILL cr);
 6944 
 6945   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6946             "andl    $dst, right_n_bits($mask, 8)" %}
 6947   ins_encode %{
 6948     Register Rdst = $dst$$Register;
 6949     __ movzbq(Rdst, $mem$$Address);
 6950     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6951   %}
 6952   ins_pipe(ialu_reg_mem);
 6953 %}
 6954 
 6955 // Load Short (16 bit signed)
 6956 instruct loadS(rRegI dst, memory mem)
 6957 %{
 6958   match(Set dst (LoadS mem));
 6959 
 6960   ins_cost(125);
 6961   format %{ "movswl $dst, $mem\t# short" %}
 6962 
 6963   ins_encode %{
 6964     __ movswl($dst$$Register, $mem$$Address);
 6965   %}
 6966 
 6967   ins_pipe(ialu_reg_mem);
 6968 %}
 6969 
 6970 // Load Short (16 bit signed) to Byte (8 bit signed)
 6971 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6972   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6973 
 6974   ins_cost(125);
 6975   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6976   ins_encode %{
 6977     __ movsbl($dst$$Register, $mem$$Address);
 6978   %}
 6979   ins_pipe(ialu_reg_mem);
 6980 %}
 6981 
 6982 // Load Short (16 bit signed) into Long Register
 6983 instruct loadS2L(rRegL dst, memory mem)
 6984 %{
 6985   match(Set dst (ConvI2L (LoadS mem)));
 6986 
 6987   ins_cost(125);
 6988   format %{ "movswq $dst, $mem\t# short -> long" %}
 6989 
 6990   ins_encode %{
 6991     __ movswq($dst$$Register, $mem$$Address);
 6992   %}
 6993 
 6994   ins_pipe(ialu_reg_mem);
 6995 %}
 6996 
 6997 // Load Unsigned Short/Char (16 bit UNsigned)
 6998 instruct loadUS(rRegI dst, memory mem)
 6999 %{
 7000   match(Set dst (LoadUS mem));
 7001 
 7002   ins_cost(125);
 7003   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7004 
 7005   ins_encode %{
 7006     __ movzwl($dst$$Register, $mem$$Address);
 7007   %}
 7008 
 7009   ins_pipe(ialu_reg_mem);
 7010 %}
 7011 
 7012 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7013 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7014   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7015 
 7016   ins_cost(125);
 7017   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7018   ins_encode %{
 7019     __ movsbl($dst$$Register, $mem$$Address);
 7020   %}
 7021   ins_pipe(ialu_reg_mem);
 7022 %}
 7023 
 7024 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7025 instruct loadUS2L(rRegL dst, memory mem)
 7026 %{
 7027   match(Set dst (ConvI2L (LoadUS mem)));
 7028 
 7029   ins_cost(125);
 7030   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7031 
 7032   ins_encode %{
 7033     __ movzwq($dst$$Register, $mem$$Address);
 7034   %}
 7035 
 7036   ins_pipe(ialu_reg_mem);
 7037 %}
 7038 
 7039 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7040 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7041   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7042 
 7043   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7044   ins_encode %{
 7045     __ movzbq($dst$$Register, $mem$$Address);
 7046   %}
 7047   ins_pipe(ialu_reg_mem);
 7048 %}
 7049 
 7050 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7051 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7052   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7053   effect(KILL cr);
 7054 
 7055   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7056             "andl    $dst, right_n_bits($mask, 16)" %}
 7057   ins_encode %{
 7058     Register Rdst = $dst$$Register;
 7059     __ movzwq(Rdst, $mem$$Address);
 7060     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7061   %}
 7062   ins_pipe(ialu_reg_mem);
 7063 %}
 7064 
 7065 // Load Integer
 7066 instruct loadI(rRegI dst, memory mem)
 7067 %{
 7068   match(Set dst (LoadI mem));
 7069 
 7070   ins_cost(125);
 7071   format %{ "movl    $dst, $mem\t# int" %}
 7072 
 7073   ins_encode %{
 7074     __ movl($dst$$Register, $mem$$Address);
 7075   %}
 7076 
 7077   ins_pipe(ialu_reg_mem);
 7078 %}
 7079 
 7080 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7081 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7082   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7083 
 7084   ins_cost(125);
 7085   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7086   ins_encode %{
 7087     __ movsbl($dst$$Register, $mem$$Address);
 7088   %}
 7089   ins_pipe(ialu_reg_mem);
 7090 %}
 7091 
 7092 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7093 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7094   match(Set dst (AndI (LoadI mem) mask));
 7095 
 7096   ins_cost(125);
 7097   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7098   ins_encode %{
 7099     __ movzbl($dst$$Register, $mem$$Address);
 7100   %}
 7101   ins_pipe(ialu_reg_mem);
 7102 %}
 7103 
 7104 // Load Integer (32 bit signed) to Short (16 bit signed)
 7105 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7106   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7107 
 7108   ins_cost(125);
 7109   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7110   ins_encode %{
 7111     __ movswl($dst$$Register, $mem$$Address);
 7112   %}
 7113   ins_pipe(ialu_reg_mem);
 7114 %}
 7115 
 7116 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7117 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7118   match(Set dst (AndI (LoadI mem) mask));
 7119 
 7120   ins_cost(125);
 7121   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7122   ins_encode %{
 7123     __ movzwl($dst$$Register, $mem$$Address);
 7124   %}
 7125   ins_pipe(ialu_reg_mem);
 7126 %}
 7127 
 7128 // Load Integer into Long Register
 7129 instruct loadI2L(rRegL dst, memory mem)
 7130 %{
 7131   match(Set dst (ConvI2L (LoadI mem)));
 7132 
 7133   ins_cost(125);
 7134   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7135 
 7136   ins_encode %{
 7137     __ movslq($dst$$Register, $mem$$Address);
 7138   %}
 7139 
 7140   ins_pipe(ialu_reg_mem);
 7141 %}
 7142 
 7143 // Load Integer with mask 0xFF into Long Register
 7144 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7145   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7146 
 7147   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7148   ins_encode %{
 7149     __ movzbq($dst$$Register, $mem$$Address);
 7150   %}
 7151   ins_pipe(ialu_reg_mem);
 7152 %}
 7153 
 7154 // Load Integer with mask 0xFFFF into Long Register
 7155 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7156   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7157 
 7158   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7159   ins_encode %{
 7160     __ movzwq($dst$$Register, $mem$$Address);
 7161   %}
 7162   ins_pipe(ialu_reg_mem);
 7163 %}
 7164 
 7165 // Load Integer with a 31-bit mask into Long Register
 7166 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7167   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7168   effect(KILL cr);
 7169 
 7170   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7171             "andl    $dst, $mask" %}
 7172   ins_encode %{
 7173     Register Rdst = $dst$$Register;
 7174     __ movl(Rdst, $mem$$Address);
 7175     __ andl(Rdst, $mask$$constant);
 7176   %}
 7177   ins_pipe(ialu_reg_mem);
 7178 %}
 7179 
 7180 // Load Unsigned Integer into Long Register
 7181 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7182 %{
 7183   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7184 
 7185   ins_cost(125);
 7186   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7187 
 7188   ins_encode %{
 7189     __ movl($dst$$Register, $mem$$Address);
 7190   %}
 7191 
 7192   ins_pipe(ialu_reg_mem);
 7193 %}
 7194 
 7195 // Load Long
 7196 instruct loadL(rRegL dst, memory mem)
 7197 %{
 7198   match(Set dst (LoadL mem));
 7199 
 7200   ins_cost(125);
 7201   format %{ "movq    $dst, $mem\t# long" %}
 7202 
 7203   ins_encode %{
 7204     __ movq($dst$$Register, $mem$$Address);
 7205   %}
 7206 
 7207   ins_pipe(ialu_reg_mem); // XXX
 7208 %}
 7209 
 7210 // Load Range
 7211 instruct loadRange(rRegI dst, memory mem)
 7212 %{
 7213   match(Set dst (LoadRange mem));
 7214 
 7215   ins_cost(125); // XXX
 7216   format %{ "movl    $dst, $mem\t# range" %}
 7217   ins_encode %{
 7218     __ movl($dst$$Register, $mem$$Address);
 7219   %}
 7220   ins_pipe(ialu_reg_mem);
 7221 %}
 7222 
 7223 // Load Pointer
 7224 instruct loadP(rRegP dst, memory mem)
 7225 %{
 7226   match(Set dst (LoadP mem));
 7227   predicate(n->as_Load()->barrier_data() == 0);
 7228 
 7229   ins_cost(125); // XXX
 7230   format %{ "movq    $dst, $mem\t# ptr" %}
 7231   ins_encode %{
 7232     __ movq($dst$$Register, $mem$$Address);
 7233   %}
 7234   ins_pipe(ialu_reg_mem); // XXX
 7235 %}
 7236 
 7237 // Load Compressed Pointer
 7238 instruct loadN(rRegN dst, memory mem)
 7239 %{
 7240    predicate(n->as_Load()->barrier_data() == 0);
 7241    match(Set dst (LoadN mem));
 7242 
 7243    ins_cost(125); // XXX
 7244    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7245    ins_encode %{
 7246      __ movl($dst$$Register, $mem$$Address);
 7247    %}
 7248    ins_pipe(ialu_reg_mem); // XXX
 7249 %}
 7250 
 7251 
 7252 // Load Klass Pointer
 7253 instruct loadKlass(rRegP dst, memory mem)
 7254 %{
 7255   match(Set dst (LoadKlass mem));
 7256 
 7257   ins_cost(125); // XXX
 7258   format %{ "movq    $dst, $mem\t# class" %}
 7259   ins_encode %{
 7260     __ movq($dst$$Register, $mem$$Address);
 7261   %}
 7262   ins_pipe(ialu_reg_mem); // XXX
 7263 %}
 7264 
 7265 // Load narrow Klass Pointer
 7266 instruct loadNKlass(rRegN dst, memory mem)
 7267 %{
 7268   predicate(!UseCompactObjectHeaders);
 7269   match(Set dst (LoadNKlass mem));
 7270 
 7271   ins_cost(125); // XXX
 7272   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7273   ins_encode %{
 7274     __ movl($dst$$Register, $mem$$Address);
 7275   %}
 7276   ins_pipe(ialu_reg_mem); // XXX
 7277 %}
 7278 
 7279 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7280 %{
 7281   predicate(UseCompactObjectHeaders);
 7282   match(Set dst (LoadNKlass mem));
 7283   effect(KILL cr);
 7284   ins_cost(125);
 7285   format %{
 7286     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7287     "shrl    $dst, markWord::klass_shift_at_offset"
 7288   %}
 7289   ins_encode %{
 7290     if (UseAPX) {
 7291       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7292     }
 7293     else {
 7294       __ movl($dst$$Register, $mem$$Address);
 7295       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7296     }
 7297   %}
 7298   ins_pipe(ialu_reg_mem);
 7299 %}
 7300 
 7301 // Load Float
 7302 instruct loadF(regF dst, memory mem)
 7303 %{
 7304   match(Set dst (LoadF mem));
 7305 
 7306   ins_cost(145); // XXX
 7307   format %{ "movss   $dst, $mem\t# float" %}
 7308   ins_encode %{
 7309     __ movflt($dst$$XMMRegister, $mem$$Address);
 7310   %}
 7311   ins_pipe(pipe_slow); // XXX
 7312 %}
 7313 
 7314 // Load Double
 7315 instruct loadD_partial(regD dst, memory mem)
 7316 %{
 7317   predicate(!UseXmmLoadAndClearUpper);
 7318   match(Set dst (LoadD mem));
 7319 
 7320   ins_cost(145); // XXX
 7321   format %{ "movlpd  $dst, $mem\t# double" %}
 7322   ins_encode %{
 7323     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7324   %}
 7325   ins_pipe(pipe_slow); // XXX
 7326 %}
 7327 
 7328 instruct loadD(regD dst, memory mem)
 7329 %{
 7330   predicate(UseXmmLoadAndClearUpper);
 7331   match(Set dst (LoadD mem));
 7332 
 7333   ins_cost(145); // XXX
 7334   format %{ "movsd   $dst, $mem\t# double" %}
 7335   ins_encode %{
 7336     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7337   %}
 7338   ins_pipe(pipe_slow); // XXX
 7339 %}
 7340 
 7341 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7342 %{
 7343   match(Set dst con);
 7344 
 7345   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7346 
 7347   ins_encode %{
 7348     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7349   %}
 7350 
 7351   ins_pipe(ialu_reg_fat);
 7352 %}
 7353 
 7354 // min = java.lang.Math.min(float a, float b)
 7355 // max = java.lang.Math.max(float a, float b)
 7356 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7357 %{
 7358   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7359   match(Set dst (MaxF a b));
 7360   match(Set dst (MinF a b));
 7361 
 7362   format %{ "minmaxF $dst, $a, $b" %}
 7363   ins_encode %{
 7364     int opcode = this->ideal_Opcode();
 7365     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7366   %}
 7367   ins_pipe( pipe_slow );
 7368 %}
 7369 
 7370 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
 7371 %{
 7372   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7373   match(Set dst (MaxF a b));
 7374   match(Set dst (MinF a b));
 7375   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7376 
 7377   format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7378   ins_encode %{
 7379     int opcode = this->ideal_Opcode();
 7380     bool min = (opcode == Op_MinF) ? true : false;
 7381     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7382                     min, fp_prec_flt /*pt*/);
 7383   %}
 7384   ins_pipe( pipe_slow );
 7385 %}
 7386 
 7387 // min = java.lang.Math.min(float a, float b)
 7388 // max = java.lang.Math.max(float a, float b)
 7389 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7390 %{
 7391   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7392   match(Set dst (MaxF a b));
 7393   match(Set dst (MinF a b));
 7394   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7395 
 7396   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7397   ins_encode %{
 7398     int opcode = this->ideal_Opcode();
 7399     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7400     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7401                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7402   %}
 7403   ins_pipe( pipe_slow );
 7404 %}
 7405 
 7406 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
 7407 %{
 7408   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7409   match(Set dst (MaxF a b));
 7410   match(Set dst (MinF a b));
 7411   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7412 
 7413   format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
 7414   ins_encode %{
 7415     int opcode = this->ideal_Opcode();
 7416     bool min = (opcode == Op_MinF) ? true : false;
 7417     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7418                     min, fp_prec_flt /*pt*/);
 7419   %}
 7420   ins_pipe( pipe_slow );
 7421 %}
 7422 
 7423 // min = java.lang.Math.min(double a, double b)
 7424 // max = java.lang.Math.max(double a, double b)
 7425 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7426 %{
 7427   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7428   match(Set dst (MaxD a b));
 7429   match(Set dst (MinD a b));
 7430 
 7431   format %{ "minmaxD $dst, $a, $b" %}
 7432   ins_encode %{
 7433     int opcode = this->ideal_Opcode();
 7434     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7435   %}
 7436   ins_pipe( pipe_slow );
 7437 %}
 7438 
 7439 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
 7440 %{
 7441   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7442   match(Set dst (MaxD a b));
 7443   match(Set dst (MinD a b));
 7444   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7445 
 7446   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7447   ins_encode %{
 7448     int opcode = this->ideal_Opcode();
 7449     bool min = (opcode == Op_MinD) ? true : false;
 7450     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7451                     min, fp_prec_dbl /*pt*/);
 7452   %}
 7453   ins_pipe( pipe_slow );
 7454 %}
 7455 
 7456 // min = java.lang.Math.min(double a, double b)
 7457 // max = java.lang.Math.max(double a, double b)
 7458 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7459 %{
 7460   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7461   match(Set dst (MaxD a b));
 7462   match(Set dst (MinD a b));
 7463   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7464 
 7465   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7466   ins_encode %{
 7467     int opcode = this->ideal_Opcode();
 7468     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7469     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7470                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7471   %}
 7472   ins_pipe( pipe_slow );
 7473 %}
 7474 
 7475 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
 7476 %{
 7477   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7478   match(Set dst (MaxD a b));
 7479   match(Set dst (MinD a b));
 7480   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7481 
 7482   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7483   ins_encode %{
 7484     int opcode = this->ideal_Opcode();
 7485     bool min = (opcode == Op_MinD) ? true : false;
 7486     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7487                     min, fp_prec_dbl /*pt*/);
 7488   %}
 7489   ins_pipe( pipe_slow );
 7490 %}
 7491 
 7492 // Load Effective Address
 7493 instruct leaP8(rRegP dst, indOffset8 mem)
 7494 %{
 7495   match(Set dst mem);
 7496 
 7497   ins_cost(110); // XXX
 7498   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7499   ins_encode %{
 7500     __ leaq($dst$$Register, $mem$$Address);
 7501   %}
 7502   ins_pipe(ialu_reg_reg_fat);
 7503 %}
 7504 
 7505 instruct leaP32(rRegP dst, indOffset32 mem)
 7506 %{
 7507   match(Set dst mem);
 7508 
 7509   ins_cost(110);
 7510   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7511   ins_encode %{
 7512     __ leaq($dst$$Register, $mem$$Address);
 7513   %}
 7514   ins_pipe(ialu_reg_reg_fat);
 7515 %}
 7516 
 7517 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7518 %{
 7519   match(Set dst mem);
 7520 
 7521   ins_cost(110);
 7522   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7523   ins_encode %{
 7524     __ leaq($dst$$Register, $mem$$Address);
 7525   %}
 7526   ins_pipe(ialu_reg_reg_fat);
 7527 %}
 7528 
 7529 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7530 %{
 7531   match(Set dst mem);
 7532 
 7533   ins_cost(110);
 7534   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7535   ins_encode %{
 7536     __ leaq($dst$$Register, $mem$$Address);
 7537   %}
 7538   ins_pipe(ialu_reg_reg_fat);
 7539 %}
 7540 
 7541 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7542 %{
 7543   match(Set dst mem);
 7544 
 7545   ins_cost(110);
 7546   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7547   ins_encode %{
 7548     __ leaq($dst$$Register, $mem$$Address);
 7549   %}
 7550   ins_pipe(ialu_reg_reg_fat);
 7551 %}
 7552 
 7553 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7554 %{
 7555   match(Set dst mem);
 7556 
 7557   ins_cost(110);
 7558   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7559   ins_encode %{
 7560     __ leaq($dst$$Register, $mem$$Address);
 7561   %}
 7562   ins_pipe(ialu_reg_reg_fat);
 7563 %}
 7564 
 7565 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7566 %{
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110);
 7570   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7578 %{
 7579   match(Set dst mem);
 7580 
 7581   ins_cost(110);
 7582   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7583   ins_encode %{
 7584     __ leaq($dst$$Register, $mem$$Address);
 7585   %}
 7586   ins_pipe(ialu_reg_reg_fat);
 7587 %}
 7588 
 7589 // Load Effective Address which uses Narrow (32-bits) oop
 7590 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7591 %{
 7592   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7593   match(Set dst mem);
 7594 
 7595   ins_cost(110);
 7596   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7597   ins_encode %{
 7598     __ leaq($dst$$Register, $mem$$Address);
 7599   %}
 7600   ins_pipe(ialu_reg_reg_fat);
 7601 %}
 7602 
 7603 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7604 %{
 7605   predicate(CompressedOops::shift() == 0);
 7606   match(Set dst mem);
 7607 
 7608   ins_cost(110); // XXX
 7609   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7610   ins_encode %{
 7611     __ leaq($dst$$Register, $mem$$Address);
 7612   %}
 7613   ins_pipe(ialu_reg_reg_fat);
 7614 %}
 7615 
 7616 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7617 %{
 7618   predicate(CompressedOops::shift() == 0);
 7619   match(Set dst mem);
 7620 
 7621   ins_cost(110);
 7622   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7623   ins_encode %{
 7624     __ leaq($dst$$Register, $mem$$Address);
 7625   %}
 7626   ins_pipe(ialu_reg_reg_fat);
 7627 %}
 7628 
 7629 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7630 %{
 7631   predicate(CompressedOops::shift() == 0);
 7632   match(Set dst mem);
 7633 
 7634   ins_cost(110);
 7635   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7636   ins_encode %{
 7637     __ leaq($dst$$Register, $mem$$Address);
 7638   %}
 7639   ins_pipe(ialu_reg_reg_fat);
 7640 %}
 7641 
 7642 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7643 %{
 7644   predicate(CompressedOops::shift() == 0);
 7645   match(Set dst mem);
 7646 
 7647   ins_cost(110);
 7648   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7649   ins_encode %{
 7650     __ leaq($dst$$Register, $mem$$Address);
 7651   %}
 7652   ins_pipe(ialu_reg_reg_fat);
 7653 %}
 7654 
 7655 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7656 %{
 7657   predicate(CompressedOops::shift() == 0);
 7658   match(Set dst mem);
 7659 
 7660   ins_cost(110);
 7661   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7662   ins_encode %{
 7663     __ leaq($dst$$Register, $mem$$Address);
 7664   %}
 7665   ins_pipe(ialu_reg_reg_fat);
 7666 %}
 7667 
 7668 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7669 %{
 7670   predicate(CompressedOops::shift() == 0);
 7671   match(Set dst mem);
 7672 
 7673   ins_cost(110);
 7674   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7675   ins_encode %{
 7676     __ leaq($dst$$Register, $mem$$Address);
 7677   %}
 7678   ins_pipe(ialu_reg_reg_fat);
 7679 %}
 7680 
 7681 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7682 %{
 7683   predicate(CompressedOops::shift() == 0);
 7684   match(Set dst mem);
 7685 
 7686   ins_cost(110);
 7687   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7688   ins_encode %{
 7689     __ leaq($dst$$Register, $mem$$Address);
 7690   %}
 7691   ins_pipe(ialu_reg_reg_fat);
 7692 %}
 7693 
 7694 instruct loadConI(rRegI dst, immI src)
 7695 %{
 7696   match(Set dst src);
 7697 
 7698   format %{ "movl    $dst, $src\t# int" %}
 7699   ins_encode %{
 7700     __ movl($dst$$Register, $src$$constant);
 7701   %}
 7702   ins_pipe(ialu_reg_fat); // XXX
 7703 %}
 7704 
 7705 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7706 %{
 7707   match(Set dst src);
 7708   effect(KILL cr);
 7709 
 7710   ins_cost(50);
 7711   format %{ "xorl    $dst, $dst\t# int" %}
 7712   ins_encode %{
 7713     __ xorl($dst$$Register, $dst$$Register);
 7714   %}
 7715   ins_pipe(ialu_reg);
 7716 %}
 7717 
 7718 instruct loadConL(rRegL dst, immL src)
 7719 %{
 7720   match(Set dst src);
 7721 
 7722   ins_cost(150);
 7723   format %{ "movq    $dst, $src\t# long" %}
 7724   ins_encode %{
 7725     __ mov64($dst$$Register, $src$$constant);
 7726   %}
 7727   ins_pipe(ialu_reg);
 7728 %}
 7729 
 7730 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7731 %{
 7732   match(Set dst src);
 7733   effect(KILL cr);
 7734 
 7735   ins_cost(50);
 7736   format %{ "xorl    $dst, $dst\t# long" %}
 7737   ins_encode %{
 7738     __ xorl($dst$$Register, $dst$$Register);
 7739   %}
 7740   ins_pipe(ialu_reg); // XXX
 7741 %}
 7742 
 7743 instruct loadConUL32(rRegL dst, immUL32 src)
 7744 %{
 7745   match(Set dst src);
 7746 
 7747   ins_cost(60);
 7748   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7749   ins_encode %{
 7750     __ movl($dst$$Register, $src$$constant);
 7751   %}
 7752   ins_pipe(ialu_reg);
 7753 %}
 7754 
 7755 instruct loadConL32(rRegL dst, immL32 src)
 7756 %{
 7757   match(Set dst src);
 7758 
 7759   ins_cost(70);
 7760   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7761   ins_encode %{
 7762     __ movq($dst$$Register, $src$$constant);
 7763   %}
 7764   ins_pipe(ialu_reg);
 7765 %}
 7766 
 7767 instruct loadConP(rRegP dst, immP con) %{
 7768   match(Set dst con);
 7769 
 7770   format %{ "movq    $dst, $con\t# ptr" %}
 7771   ins_encode %{
 7772     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7773   %}
 7774   ins_pipe(ialu_reg_fat); // XXX
 7775 %}
 7776 
 7777 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7778 %{
 7779   match(Set dst src);
 7780   effect(KILL cr);
 7781 
 7782   ins_cost(50);
 7783   format %{ "xorl    $dst, $dst\t# ptr" %}
 7784   ins_encode %{
 7785     __ xorl($dst$$Register, $dst$$Register);
 7786   %}
 7787   ins_pipe(ialu_reg);
 7788 %}
 7789 
 7790 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7791 %{
 7792   match(Set dst src);
 7793   effect(KILL cr);
 7794 
 7795   ins_cost(60);
 7796   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7797   ins_encode %{
 7798     __ movl($dst$$Register, $src$$constant);
 7799   %}
 7800   ins_pipe(ialu_reg);
 7801 %}
 7802 
 7803 instruct loadConF(regF dst, immF con) %{
 7804   match(Set dst con);
 7805   ins_cost(125);
 7806   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7807   ins_encode %{
 7808     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7809   %}
 7810   ins_pipe(pipe_slow);
 7811 %}
 7812 
 7813 instruct loadConH(regF dst, immH con) %{
 7814   match(Set dst con);
 7815   ins_cost(125);
 7816   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7817   ins_encode %{
 7818     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7819   %}
 7820   ins_pipe(pipe_slow);
 7821 %}
 7822 
 7823 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7824   match(Set dst src);
 7825   effect(KILL cr);
 7826   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7827   ins_encode %{
 7828     __ xorq($dst$$Register, $dst$$Register);
 7829   %}
 7830   ins_pipe(ialu_reg);
 7831 %}
 7832 
 7833 instruct loadConN(rRegN dst, immN src) %{
 7834   match(Set dst src);
 7835 
 7836   ins_cost(125);
 7837   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7838   ins_encode %{
 7839     address con = (address)$src$$constant;
 7840     if (con == nullptr) {
 7841       ShouldNotReachHere();
 7842     } else {
 7843       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7844     }
 7845   %}
 7846   ins_pipe(ialu_reg_fat); // XXX
 7847 %}
 7848 
 7849 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7850   match(Set dst src);
 7851 
 7852   ins_cost(125);
 7853   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7854   ins_encode %{
 7855     address con = (address)$src$$constant;
 7856     if (con == nullptr) {
 7857       ShouldNotReachHere();
 7858     } else {
 7859       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7860     }
 7861   %}
 7862   ins_pipe(ialu_reg_fat); // XXX
 7863 %}
 7864 
 7865 instruct loadConF0(regF dst, immF0 src)
 7866 %{
 7867   match(Set dst src);
 7868   ins_cost(100);
 7869 
 7870   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7871   ins_encode %{
 7872     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7873   %}
 7874   ins_pipe(pipe_slow);
 7875 %}
 7876 
 7877 // Use the same format since predicate() can not be used here.
 7878 instruct loadConD(regD dst, immD con) %{
 7879   match(Set dst con);
 7880   ins_cost(125);
 7881   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7882   ins_encode %{
 7883     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7884   %}
 7885   ins_pipe(pipe_slow);
 7886 %}
 7887 
 7888 instruct loadConD0(regD dst, immD0 src)
 7889 %{
 7890   match(Set dst src);
 7891   ins_cost(100);
 7892 
 7893   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7894   ins_encode %{
 7895     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7896   %}
 7897   ins_pipe(pipe_slow);
 7898 %}
 7899 
 7900 instruct loadSSI(rRegI dst, stackSlotI src)
 7901 %{
 7902   match(Set dst src);
 7903 
 7904   ins_cost(125);
 7905   format %{ "movl    $dst, $src\t# int stk" %}
 7906   ins_encode %{
 7907     __ movl($dst$$Register, $src$$Address);
 7908   %}
 7909   ins_pipe(ialu_reg_mem);
 7910 %}
 7911 
 7912 instruct loadSSL(rRegL dst, stackSlotL src)
 7913 %{
 7914   match(Set dst src);
 7915 
 7916   ins_cost(125);
 7917   format %{ "movq    $dst, $src\t# long stk" %}
 7918   ins_encode %{
 7919     __ movq($dst$$Register, $src$$Address);
 7920   %}
 7921   ins_pipe(ialu_reg_mem);
 7922 %}
 7923 
 7924 instruct loadSSP(rRegP dst, stackSlotP src)
 7925 %{
 7926   match(Set dst src);
 7927 
 7928   ins_cost(125);
 7929   format %{ "movq    $dst, $src\t# ptr stk" %}
 7930   ins_encode %{
 7931     __ movq($dst$$Register, $src$$Address);
 7932   %}
 7933   ins_pipe(ialu_reg_mem);
 7934 %}
 7935 
 7936 instruct loadSSF(regF dst, stackSlotF src)
 7937 %{
 7938   match(Set dst src);
 7939 
 7940   ins_cost(125);
 7941   format %{ "movss   $dst, $src\t# float stk" %}
 7942   ins_encode %{
 7943     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7944   %}
 7945   ins_pipe(pipe_slow); // XXX
 7946 %}
 7947 
 7948 // Use the same format since predicate() can not be used here.
 7949 instruct loadSSD(regD dst, stackSlotD src)
 7950 %{
 7951   match(Set dst src);
 7952 
 7953   ins_cost(125);
 7954   format %{ "movsd   $dst, $src\t# double stk" %}
 7955   ins_encode  %{
 7956     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7957   %}
 7958   ins_pipe(pipe_slow); // XXX
 7959 %}
 7960 
 7961 // Prefetch instructions for allocation.
 7962 // Must be safe to execute with invalid address (cannot fault).
 7963 
 7964 instruct prefetchAlloc( memory mem ) %{
 7965   predicate(AllocatePrefetchInstr==3);
 7966   match(PrefetchAllocation mem);
 7967   ins_cost(125);
 7968 
 7969   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7970   ins_encode %{
 7971     __ prefetchw($mem$$Address);
 7972   %}
 7973   ins_pipe(ialu_mem);
 7974 %}
 7975 
 7976 instruct prefetchAllocNTA( memory mem ) %{
 7977   predicate(AllocatePrefetchInstr==0);
 7978   match(PrefetchAllocation mem);
 7979   ins_cost(125);
 7980 
 7981   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7982   ins_encode %{
 7983     __ prefetchnta($mem$$Address);
 7984   %}
 7985   ins_pipe(ialu_mem);
 7986 %}
 7987 
 7988 instruct prefetchAllocT0( memory mem ) %{
 7989   predicate(AllocatePrefetchInstr==1);
 7990   match(PrefetchAllocation mem);
 7991   ins_cost(125);
 7992 
 7993   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7994   ins_encode %{
 7995     __ prefetcht0($mem$$Address);
 7996   %}
 7997   ins_pipe(ialu_mem);
 7998 %}
 7999 
 8000 instruct prefetchAllocT2( memory mem ) %{
 8001   predicate(AllocatePrefetchInstr==2);
 8002   match(PrefetchAllocation mem);
 8003   ins_cost(125);
 8004 
 8005   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8006   ins_encode %{
 8007     __ prefetcht2($mem$$Address);
 8008   %}
 8009   ins_pipe(ialu_mem);
 8010 %}
 8011 
 8012 //----------Store Instructions-------------------------------------------------
 8013 
 8014 // Store Byte
 8015 instruct storeB(memory mem, rRegI src)
 8016 %{
 8017   match(Set mem (StoreB mem src));
 8018 
 8019   ins_cost(125); // XXX
 8020   format %{ "movb    $mem, $src\t# byte" %}
 8021   ins_encode %{
 8022     __ movb($mem$$Address, $src$$Register);
 8023   %}
 8024   ins_pipe(ialu_mem_reg);
 8025 %}
 8026 
 8027 // Store Char/Short
 8028 instruct storeC(memory mem, rRegI src)
 8029 %{
 8030   match(Set mem (StoreC mem src));
 8031 
 8032   ins_cost(125); // XXX
 8033   format %{ "movw    $mem, $src\t# char/short" %}
 8034   ins_encode %{
 8035     __ movw($mem$$Address, $src$$Register);
 8036   %}
 8037   ins_pipe(ialu_mem_reg);
 8038 %}
 8039 
 8040 // Store Integer
 8041 instruct storeI(memory mem, rRegI src)
 8042 %{
 8043   match(Set mem (StoreI mem src));
 8044 
 8045   ins_cost(125); // XXX
 8046   format %{ "movl    $mem, $src\t# int" %}
 8047   ins_encode %{
 8048     __ movl($mem$$Address, $src$$Register);
 8049   %}
 8050   ins_pipe(ialu_mem_reg);
 8051 %}
 8052 
 8053 // Store Long
 8054 instruct storeL(memory mem, rRegL src)
 8055 %{
 8056   match(Set mem (StoreL mem src));
 8057 
 8058   ins_cost(125); // XXX
 8059   format %{ "movq    $mem, $src\t# long" %}
 8060   ins_encode %{
 8061     __ movq($mem$$Address, $src$$Register);
 8062   %}
 8063   ins_pipe(ialu_mem_reg); // XXX
 8064 %}
 8065 
 8066 // Store Pointer
 8067 instruct storeP(memory mem, any_RegP src)
 8068 %{
 8069   predicate(n->as_Store()->barrier_data() == 0);
 8070   match(Set mem (StoreP mem src));
 8071 
 8072   ins_cost(125); // XXX
 8073   format %{ "movq    $mem, $src\t# ptr" %}
 8074   ins_encode %{
 8075     __ movq($mem$$Address, $src$$Register);
 8076   %}
 8077   ins_pipe(ialu_mem_reg);
 8078 %}
 8079 
 8080 instruct storeImmP0(memory mem, immP0 zero)
 8081 %{
 8082   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8083   match(Set mem (StoreP mem zero));
 8084 
 8085   ins_cost(125); // XXX
 8086   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8087   ins_encode %{
 8088     __ movq($mem$$Address, r12);
 8089   %}
 8090   ins_pipe(ialu_mem_reg);
 8091 %}
 8092 
 8093 // Store Null Pointer, mark word, or other simple pointer constant.
 8094 instruct storeImmP(memory mem, immP31 src)
 8095 %{
 8096   predicate(n->as_Store()->barrier_data() == 0);
 8097   match(Set mem (StoreP mem src));
 8098 
 8099   ins_cost(150); // XXX
 8100   format %{ "movq    $mem, $src\t# ptr" %}
 8101   ins_encode %{
 8102     __ movq($mem$$Address, $src$$constant);
 8103   %}
 8104   ins_pipe(ialu_mem_imm);
 8105 %}
 8106 
 8107 // Store Compressed Pointer
 8108 instruct storeN(memory mem, rRegN src)
 8109 %{
 8110   predicate(n->as_Store()->barrier_data() == 0);
 8111   match(Set mem (StoreN mem src));
 8112 
 8113   ins_cost(125); // XXX
 8114   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8115   ins_encode %{
 8116     __ movl($mem$$Address, $src$$Register);
 8117   %}
 8118   ins_pipe(ialu_mem_reg);
 8119 %}
 8120 
 8121 instruct storeNKlass(memory mem, rRegN src)
 8122 %{
 8123   match(Set mem (StoreNKlass mem src));
 8124 
 8125   ins_cost(125); // XXX
 8126   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8127   ins_encode %{
 8128     __ movl($mem$$Address, $src$$Register);
 8129   %}
 8130   ins_pipe(ialu_mem_reg);
 8131 %}
 8132 
 8133 instruct storeImmN0(memory mem, immN0 zero)
 8134 %{
 8135   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8136   match(Set mem (StoreN mem zero));
 8137 
 8138   ins_cost(125); // XXX
 8139   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8140   ins_encode %{
 8141     __ movl($mem$$Address, r12);
 8142   %}
 8143   ins_pipe(ialu_mem_reg);
 8144 %}
 8145 
 8146 instruct storeImmN(memory mem, immN src)
 8147 %{
 8148   predicate(n->as_Store()->barrier_data() == 0);
 8149   match(Set mem (StoreN mem src));
 8150 
 8151   ins_cost(150); // XXX
 8152   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8153   ins_encode %{
 8154     address con = (address)$src$$constant;
 8155     if (con == nullptr) {
 8156       __ movl($mem$$Address, 0);
 8157     } else {
 8158       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8159     }
 8160   %}
 8161   ins_pipe(ialu_mem_imm);
 8162 %}
 8163 
 8164 instruct storeImmNKlass(memory mem, immNKlass src)
 8165 %{
 8166   match(Set mem (StoreNKlass mem src));
 8167 
 8168   ins_cost(150); // XXX
 8169   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8170   ins_encode %{
 8171     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8172   %}
 8173   ins_pipe(ialu_mem_imm);
 8174 %}
 8175 
 8176 // Store Integer Immediate
 8177 instruct storeImmI0(memory mem, immI_0 zero)
 8178 %{
 8179   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8180   match(Set mem (StoreI mem zero));
 8181 
 8182   ins_cost(125); // XXX
 8183   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8184   ins_encode %{
 8185     __ movl($mem$$Address, r12);
 8186   %}
 8187   ins_pipe(ialu_mem_reg);
 8188 %}
 8189 
 8190 instruct storeImmI(memory mem, immI src)
 8191 %{
 8192   match(Set mem (StoreI mem src));
 8193 
 8194   ins_cost(150);
 8195   format %{ "movl    $mem, $src\t# int" %}
 8196   ins_encode %{
 8197     __ movl($mem$$Address, $src$$constant);
 8198   %}
 8199   ins_pipe(ialu_mem_imm);
 8200 %}
 8201 
 8202 // Store Long Immediate
 8203 instruct storeImmL0(memory mem, immL0 zero)
 8204 %{
 8205   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8206   match(Set mem (StoreL mem zero));
 8207 
 8208   ins_cost(125); // XXX
 8209   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8210   ins_encode %{
 8211     __ movq($mem$$Address, r12);
 8212   %}
 8213   ins_pipe(ialu_mem_reg);
 8214 %}
 8215 
 8216 instruct storeImmL(memory mem, immL32 src)
 8217 %{
 8218   match(Set mem (StoreL mem src));
 8219 
 8220   ins_cost(150);
 8221   format %{ "movq    $mem, $src\t# long" %}
 8222   ins_encode %{
 8223     __ movq($mem$$Address, $src$$constant);
 8224   %}
 8225   ins_pipe(ialu_mem_imm);
 8226 %}
 8227 
 8228 // Store Short/Char Immediate
 8229 instruct storeImmC0(memory mem, immI_0 zero)
 8230 %{
 8231   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8232   match(Set mem (StoreC mem zero));
 8233 
 8234   ins_cost(125); // XXX
 8235   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8236   ins_encode %{
 8237     __ movw($mem$$Address, r12);
 8238   %}
 8239   ins_pipe(ialu_mem_reg);
 8240 %}
 8241 
 8242 instruct storeImmI16(memory mem, immI16 src)
 8243 %{
 8244   predicate(UseStoreImmI16);
 8245   match(Set mem (StoreC mem src));
 8246 
 8247   ins_cost(150);
 8248   format %{ "movw    $mem, $src\t# short/char" %}
 8249   ins_encode %{
 8250     __ movw($mem$$Address, $src$$constant);
 8251   %}
 8252   ins_pipe(ialu_mem_imm);
 8253 %}
 8254 
 8255 // Store Byte Immediate
 8256 instruct storeImmB0(memory mem, immI_0 zero)
 8257 %{
 8258   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8259   match(Set mem (StoreB mem zero));
 8260 
 8261   ins_cost(125); // XXX
 8262   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8263   ins_encode %{
 8264     __ movb($mem$$Address, r12);
 8265   %}
 8266   ins_pipe(ialu_mem_reg);
 8267 %}
 8268 
 8269 instruct storeImmB(memory mem, immI8 src)
 8270 %{
 8271   match(Set mem (StoreB mem src));
 8272 
 8273   ins_cost(150); // XXX
 8274   format %{ "movb    $mem, $src\t# byte" %}
 8275   ins_encode %{
 8276     __ movb($mem$$Address, $src$$constant);
 8277   %}
 8278   ins_pipe(ialu_mem_imm);
 8279 %}
 8280 
 8281 // Store Float
 8282 instruct storeF(memory mem, regF src)
 8283 %{
 8284   match(Set mem (StoreF mem src));
 8285 
 8286   ins_cost(95); // XXX
 8287   format %{ "movss   $mem, $src\t# float" %}
 8288   ins_encode %{
 8289     __ movflt($mem$$Address, $src$$XMMRegister);
 8290   %}
 8291   ins_pipe(pipe_slow); // XXX
 8292 %}
 8293 
 8294 // Store immediate Float value (it is faster than store from XMM register)
 8295 instruct storeF0(memory mem, immF0 zero)
 8296 %{
 8297   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8298   match(Set mem (StoreF mem zero));
 8299 
 8300   ins_cost(25); // XXX
 8301   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8302   ins_encode %{
 8303     __ movl($mem$$Address, r12);
 8304   %}
 8305   ins_pipe(ialu_mem_reg);
 8306 %}
 8307 
 8308 instruct storeF_imm(memory mem, immF src)
 8309 %{
 8310   match(Set mem (StoreF mem src));
 8311 
 8312   ins_cost(50);
 8313   format %{ "movl    $mem, $src\t# float" %}
 8314   ins_encode %{
 8315     __ movl($mem$$Address, jint_cast($src$$constant));
 8316   %}
 8317   ins_pipe(ialu_mem_imm);
 8318 %}
 8319 
 8320 // Store Double
 8321 instruct storeD(memory mem, regD src)
 8322 %{
 8323   match(Set mem (StoreD mem src));
 8324 
 8325   ins_cost(95); // XXX
 8326   format %{ "movsd   $mem, $src\t# double" %}
 8327   ins_encode %{
 8328     __ movdbl($mem$$Address, $src$$XMMRegister);
 8329   %}
 8330   ins_pipe(pipe_slow); // XXX
 8331 %}
 8332 
 8333 // Store immediate double 0.0 (it is faster than store from XMM register)
 8334 instruct storeD0_imm(memory mem, immD0 src)
 8335 %{
 8336   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8337   match(Set mem (StoreD mem src));
 8338 
 8339   ins_cost(50);
 8340   format %{ "movq    $mem, $src\t# double 0." %}
 8341   ins_encode %{
 8342     __ movq($mem$$Address, $src$$constant);
 8343   %}
 8344   ins_pipe(ialu_mem_imm);
 8345 %}
 8346 
 8347 instruct storeD0(memory mem, immD0 zero)
 8348 %{
 8349   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8350   match(Set mem (StoreD mem zero));
 8351 
 8352   ins_cost(25); // XXX
 8353   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8354   ins_encode %{
 8355     __ movq($mem$$Address, r12);
 8356   %}
 8357   ins_pipe(ialu_mem_reg);
 8358 %}
 8359 
 8360 instruct storeSSI(stackSlotI dst, rRegI src)
 8361 %{
 8362   match(Set dst src);
 8363 
 8364   ins_cost(100);
 8365   format %{ "movl    $dst, $src\t# int stk" %}
 8366   ins_encode %{
 8367     __ movl($dst$$Address, $src$$Register);
 8368   %}
 8369   ins_pipe( ialu_mem_reg );
 8370 %}
 8371 
 8372 instruct storeSSL(stackSlotL dst, rRegL src)
 8373 %{
 8374   match(Set dst src);
 8375 
 8376   ins_cost(100);
 8377   format %{ "movq    $dst, $src\t# long stk" %}
 8378   ins_encode %{
 8379     __ movq($dst$$Address, $src$$Register);
 8380   %}
 8381   ins_pipe(ialu_mem_reg);
 8382 %}
 8383 
 8384 instruct storeSSP(stackSlotP dst, rRegP src)
 8385 %{
 8386   match(Set dst src);
 8387 
 8388   ins_cost(100);
 8389   format %{ "movq    $dst, $src\t# ptr stk" %}
 8390   ins_encode %{
 8391     __ movq($dst$$Address, $src$$Register);
 8392   %}
 8393   ins_pipe(ialu_mem_reg);
 8394 %}
 8395 
 8396 instruct storeSSF(stackSlotF dst, regF src)
 8397 %{
 8398   match(Set dst src);
 8399 
 8400   ins_cost(95); // XXX
 8401   format %{ "movss   $dst, $src\t# float stk" %}
 8402   ins_encode %{
 8403     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8404   %}
 8405   ins_pipe(pipe_slow); // XXX
 8406 %}
 8407 
 8408 instruct storeSSD(stackSlotD dst, regD src)
 8409 %{
 8410   match(Set dst src);
 8411 
 8412   ins_cost(95); // XXX
 8413   format %{ "movsd   $dst, $src\t# double stk" %}
 8414   ins_encode %{
 8415     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8416   %}
 8417   ins_pipe(pipe_slow); // XXX
 8418 %}
 8419 
 8420 instruct cacheWB(indirect addr)
 8421 %{
 8422   predicate(VM_Version::supports_data_cache_line_flush());
 8423   match(CacheWB addr);
 8424 
 8425   ins_cost(100);
 8426   format %{"cache wb $addr" %}
 8427   ins_encode %{
 8428     assert($addr->index_position() < 0, "should be");
 8429     assert($addr$$disp == 0, "should be");
 8430     __ cache_wb(Address($addr$$base$$Register, 0));
 8431   %}
 8432   ins_pipe(pipe_slow); // XXX
 8433 %}
 8434 
 8435 instruct cacheWBPreSync()
 8436 %{
 8437   predicate(VM_Version::supports_data_cache_line_flush());
 8438   match(CacheWBPreSync);
 8439 
 8440   ins_cost(100);
 8441   format %{"cache wb presync" %}
 8442   ins_encode %{
 8443     __ cache_wbsync(true);
 8444   %}
 8445   ins_pipe(pipe_slow); // XXX
 8446 %}
 8447 
 8448 instruct cacheWBPostSync()
 8449 %{
 8450   predicate(VM_Version::supports_data_cache_line_flush());
 8451   match(CacheWBPostSync);
 8452 
 8453   ins_cost(100);
 8454   format %{"cache wb postsync" %}
 8455   ins_encode %{
 8456     __ cache_wbsync(false);
 8457   %}
 8458   ins_pipe(pipe_slow); // XXX
 8459 %}
 8460 
 8461 //----------BSWAP Instructions-------------------------------------------------
 8462 instruct bytes_reverse_int(rRegI dst) %{
 8463   match(Set dst (ReverseBytesI dst));
 8464 
 8465   format %{ "bswapl  $dst" %}
 8466   ins_encode %{
 8467     __ bswapl($dst$$Register);
 8468   %}
 8469   ins_pipe( ialu_reg );
 8470 %}
 8471 
 8472 instruct bytes_reverse_long(rRegL dst) %{
 8473   match(Set dst (ReverseBytesL dst));
 8474 
 8475   format %{ "bswapq  $dst" %}
 8476   ins_encode %{
 8477     __ bswapq($dst$$Register);
 8478   %}
 8479   ins_pipe( ialu_reg);
 8480 %}
 8481 
 8482 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8483   match(Set dst (ReverseBytesUS dst));
 8484   effect(KILL cr);
 8485 
 8486   format %{ "bswapl  $dst\n\t"
 8487             "shrl    $dst,16\n\t" %}
 8488   ins_encode %{
 8489     __ bswapl($dst$$Register);
 8490     __ shrl($dst$$Register, 16);
 8491   %}
 8492   ins_pipe( ialu_reg );
 8493 %}
 8494 
 8495 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8496   match(Set dst (ReverseBytesS dst));
 8497   effect(KILL cr);
 8498 
 8499   format %{ "bswapl  $dst\n\t"
 8500             "sar     $dst,16\n\t" %}
 8501   ins_encode %{
 8502     __ bswapl($dst$$Register);
 8503     __ sarl($dst$$Register, 16);
 8504   %}
 8505   ins_pipe( ialu_reg );
 8506 %}
 8507 
 8508 //---------- Zeros Count Instructions ------------------------------------------
 8509 
 8510 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8511   predicate(UseCountLeadingZerosInstruction);
 8512   match(Set dst (CountLeadingZerosI src));
 8513   effect(KILL cr);
 8514 
 8515   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8516   ins_encode %{
 8517     __ lzcntl($dst$$Register, $src$$Register);
 8518   %}
 8519   ins_pipe(ialu_reg);
 8520 %}
 8521 
 8522 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8523   predicate(UseCountLeadingZerosInstruction);
 8524   match(Set dst (CountLeadingZerosI (LoadI src)));
 8525   effect(KILL cr);
 8526   ins_cost(175);
 8527   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8528   ins_encode %{
 8529     __ lzcntl($dst$$Register, $src$$Address);
 8530   %}
 8531   ins_pipe(ialu_reg_mem);
 8532 %}
 8533 
 8534 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8535   predicate(!UseCountLeadingZerosInstruction);
 8536   match(Set dst (CountLeadingZerosI src));
 8537   effect(KILL cr);
 8538 
 8539   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8540             "jnz     skip\n\t"
 8541             "movl    $dst, -1\n"
 8542       "skip:\n\t"
 8543             "negl    $dst\n\t"
 8544             "addl    $dst, 31" %}
 8545   ins_encode %{
 8546     Register Rdst = $dst$$Register;
 8547     Register Rsrc = $src$$Register;
 8548     Label skip;
 8549     __ bsrl(Rdst, Rsrc);
 8550     __ jccb(Assembler::notZero, skip);
 8551     __ movl(Rdst, -1);
 8552     __ bind(skip);
 8553     __ negl(Rdst);
 8554     __ addl(Rdst, BitsPerInt - 1);
 8555   %}
 8556   ins_pipe(ialu_reg);
 8557 %}
 8558 
 8559 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8560   predicate(UseCountLeadingZerosInstruction);
 8561   match(Set dst (CountLeadingZerosL src));
 8562   effect(KILL cr);
 8563 
 8564   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8565   ins_encode %{
 8566     __ lzcntq($dst$$Register, $src$$Register);
 8567   %}
 8568   ins_pipe(ialu_reg);
 8569 %}
 8570 
 8571 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8572   predicate(UseCountLeadingZerosInstruction);
 8573   match(Set dst (CountLeadingZerosL (LoadL src)));
 8574   effect(KILL cr);
 8575   ins_cost(175);
 8576   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8577   ins_encode %{
 8578     __ lzcntq($dst$$Register, $src$$Address);
 8579   %}
 8580   ins_pipe(ialu_reg_mem);
 8581 %}
 8582 
 8583 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8584   predicate(!UseCountLeadingZerosInstruction);
 8585   match(Set dst (CountLeadingZerosL src));
 8586   effect(KILL cr);
 8587 
 8588   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8589             "jnz     skip\n\t"
 8590             "movl    $dst, -1\n"
 8591       "skip:\n\t"
 8592             "negl    $dst\n\t"
 8593             "addl    $dst, 63" %}
 8594   ins_encode %{
 8595     Register Rdst = $dst$$Register;
 8596     Register Rsrc = $src$$Register;
 8597     Label skip;
 8598     __ bsrq(Rdst, Rsrc);
 8599     __ jccb(Assembler::notZero, skip);
 8600     __ movl(Rdst, -1);
 8601     __ bind(skip);
 8602     __ negl(Rdst);
 8603     __ addl(Rdst, BitsPerLong - 1);
 8604   %}
 8605   ins_pipe(ialu_reg);
 8606 %}
 8607 
 8608 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8609   predicate(UseCountTrailingZerosInstruction);
 8610   match(Set dst (CountTrailingZerosI src));
 8611   effect(KILL cr);
 8612 
 8613   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8614   ins_encode %{
 8615     __ tzcntl($dst$$Register, $src$$Register);
 8616   %}
 8617   ins_pipe(ialu_reg);
 8618 %}
 8619 
 8620 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8621   predicate(UseCountTrailingZerosInstruction);
 8622   match(Set dst (CountTrailingZerosI (LoadI src)));
 8623   effect(KILL cr);
 8624   ins_cost(175);
 8625   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8626   ins_encode %{
 8627     __ tzcntl($dst$$Register, $src$$Address);
 8628   %}
 8629   ins_pipe(ialu_reg_mem);
 8630 %}
 8631 
 8632 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8633   predicate(!UseCountTrailingZerosInstruction);
 8634   match(Set dst (CountTrailingZerosI src));
 8635   effect(KILL cr);
 8636 
 8637   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8638             "jnz     done\n\t"
 8639             "movl    $dst, 32\n"
 8640       "done:" %}
 8641   ins_encode %{
 8642     Register Rdst = $dst$$Register;
 8643     Label done;
 8644     __ bsfl(Rdst, $src$$Register);
 8645     __ jccb(Assembler::notZero, done);
 8646     __ movl(Rdst, BitsPerInt);
 8647     __ bind(done);
 8648   %}
 8649   ins_pipe(ialu_reg);
 8650 %}
 8651 
 8652 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8653   predicate(UseCountTrailingZerosInstruction);
 8654   match(Set dst (CountTrailingZerosL src));
 8655   effect(KILL cr);
 8656 
 8657   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8658   ins_encode %{
 8659     __ tzcntq($dst$$Register, $src$$Register);
 8660   %}
 8661   ins_pipe(ialu_reg);
 8662 %}
 8663 
 8664 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8665   predicate(UseCountTrailingZerosInstruction);
 8666   match(Set dst (CountTrailingZerosL (LoadL src)));
 8667   effect(KILL cr);
 8668   ins_cost(175);
 8669   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8670   ins_encode %{
 8671     __ tzcntq($dst$$Register, $src$$Address);
 8672   %}
 8673   ins_pipe(ialu_reg_mem);
 8674 %}
 8675 
 8676 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8677   predicate(!UseCountTrailingZerosInstruction);
 8678   match(Set dst (CountTrailingZerosL src));
 8679   effect(KILL cr);
 8680 
 8681   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8682             "jnz     done\n\t"
 8683             "movl    $dst, 64\n"
 8684       "done:" %}
 8685   ins_encode %{
 8686     Register Rdst = $dst$$Register;
 8687     Label done;
 8688     __ bsfq(Rdst, $src$$Register);
 8689     __ jccb(Assembler::notZero, done);
 8690     __ movl(Rdst, BitsPerLong);
 8691     __ bind(done);
 8692   %}
 8693   ins_pipe(ialu_reg);
 8694 %}
 8695 
 8696 //--------------- Reverse Operation Instructions ----------------
 8697 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8698   predicate(!VM_Version::supports_gfni());
 8699   match(Set dst (ReverseI src));
 8700   effect(TEMP dst, TEMP rtmp, KILL cr);
 8701   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8702   ins_encode %{
 8703     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8704   %}
 8705   ins_pipe( ialu_reg );
 8706 %}
 8707 
 8708 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8709   predicate(VM_Version::supports_gfni());
 8710   match(Set dst (ReverseI src));
 8711   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8712   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8713   ins_encode %{
 8714     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8715   %}
 8716   ins_pipe( ialu_reg );
 8717 %}
 8718 
 8719 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8720   predicate(!VM_Version::supports_gfni());
 8721   match(Set dst (ReverseL src));
 8722   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8723   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8724   ins_encode %{
 8725     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8726   %}
 8727   ins_pipe( ialu_reg );
 8728 %}
 8729 
 8730 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8731   predicate(VM_Version::supports_gfni());
 8732   match(Set dst (ReverseL src));
 8733   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8734   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8735   ins_encode %{
 8736     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8737   %}
 8738   ins_pipe( ialu_reg );
 8739 %}
 8740 
 8741 //---------- Population Count Instructions -------------------------------------
 8742 
 8743 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8744   predicate(UsePopCountInstruction);
 8745   match(Set dst (PopCountI src));
 8746   effect(KILL cr);
 8747 
 8748   format %{ "popcnt  $dst, $src" %}
 8749   ins_encode %{
 8750     __ popcntl($dst$$Register, $src$$Register);
 8751   %}
 8752   ins_pipe(ialu_reg);
 8753 %}
 8754 
 8755 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8756   predicate(UsePopCountInstruction);
 8757   match(Set dst (PopCountI (LoadI mem)));
 8758   effect(KILL cr);
 8759 
 8760   format %{ "popcnt  $dst, $mem" %}
 8761   ins_encode %{
 8762     __ popcntl($dst$$Register, $mem$$Address);
 8763   %}
 8764   ins_pipe(ialu_reg);
 8765 %}
 8766 
 8767 // Note: Long.bitCount(long) returns an int.
 8768 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8769   predicate(UsePopCountInstruction);
 8770   match(Set dst (PopCountL src));
 8771   effect(KILL cr);
 8772 
 8773   format %{ "popcnt  $dst, $src" %}
 8774   ins_encode %{
 8775     __ popcntq($dst$$Register, $src$$Register);
 8776   %}
 8777   ins_pipe(ialu_reg);
 8778 %}
 8779 
 8780 // Note: Long.bitCount(long) returns an int.
 8781 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8782   predicate(UsePopCountInstruction);
 8783   match(Set dst (PopCountL (LoadL mem)));
 8784   effect(KILL cr);
 8785 
 8786   format %{ "popcnt  $dst, $mem" %}
 8787   ins_encode %{
 8788     __ popcntq($dst$$Register, $mem$$Address);
 8789   %}
 8790   ins_pipe(ialu_reg);
 8791 %}
 8792 
 8793 
 8794 //----------MemBar Instructions-----------------------------------------------
 8795 // Memory barrier flavors
 8796 
 8797 instruct membar_acquire()
 8798 %{
 8799   match(MemBarAcquire);
 8800   match(LoadFence);
 8801   ins_cost(0);
 8802 
 8803   size(0);
 8804   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8805   ins_encode();
 8806   ins_pipe(empty);
 8807 %}
 8808 
 8809 instruct membar_acquire_lock()
 8810 %{
 8811   match(MemBarAcquireLock);
 8812   ins_cost(0);
 8813 
 8814   size(0);
 8815   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8816   ins_encode();
 8817   ins_pipe(empty);
 8818 %}
 8819 
 8820 instruct membar_release()
 8821 %{
 8822   match(MemBarRelease);
 8823   match(StoreFence);
 8824   ins_cost(0);
 8825 
 8826   size(0);
 8827   format %{ "MEMBAR-release ! (empty encoding)" %}
 8828   ins_encode();
 8829   ins_pipe(empty);
 8830 %}
 8831 
 8832 instruct membar_release_lock()
 8833 %{
 8834   match(MemBarReleaseLock);
 8835   ins_cost(0);
 8836 
 8837   size(0);
 8838   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8839   ins_encode();
 8840   ins_pipe(empty);
 8841 %}
 8842 
 8843 instruct membar_storeload(rFlagsReg cr) %{
 8844   match(MemBarStoreLoad);
 8845   effect(KILL cr);
 8846   ins_cost(400);
 8847 
 8848   format %{
 8849     $$template
 8850     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8851   %}
 8852   ins_encode %{
 8853     __ membar(Assembler::StoreLoad);
 8854   %}
 8855   ins_pipe(pipe_slow);
 8856 %}
 8857 
 8858 instruct membar_volatile(rFlagsReg cr) %{
 8859   match(MemBarVolatile);
 8860   effect(KILL cr);
 8861   ins_cost(400);
 8862 
 8863   format %{
 8864     $$template
 8865     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8866   %}
 8867   ins_encode %{
 8868     __ membar(Assembler::StoreLoad);
 8869   %}
 8870   ins_pipe(pipe_slow);
 8871 %}
 8872 
 8873 instruct unnecessary_membar_volatile()
 8874 %{
 8875   match(MemBarVolatile);
 8876   predicate(Matcher::post_store_load_barrier(n));
 8877   ins_cost(0);
 8878 
 8879   size(0);
 8880   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8881   ins_encode();
 8882   ins_pipe(empty);
 8883 %}
 8884 
 8885 instruct membar_full(rFlagsReg cr) %{
 8886   match(MemBarFull);
 8887   effect(KILL cr);
 8888   ins_cost(400);
 8889 
 8890   format %{
 8891     $$template
 8892     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8893   %}
 8894   ins_encode %{
 8895     __ membar(Assembler::StoreLoad);
 8896   %}
 8897   ins_pipe(pipe_slow);
 8898 %}
 8899 
 8900 instruct membar_storestore() %{
 8901   match(MemBarStoreStore);
 8902   match(StoreStoreFence);
 8903   ins_cost(0);
 8904 
 8905   size(0);
 8906   format %{ "MEMBAR-storestore (empty encoding)" %}
 8907   ins_encode( );
 8908   ins_pipe(empty);
 8909 %}
 8910 
 8911 //----------Move Instructions--------------------------------------------------
 8912 
 8913 instruct castX2P(rRegP dst, rRegL src)
 8914 %{
 8915   match(Set dst (CastX2P src));
 8916 
 8917   format %{ "movq    $dst, $src\t# long->ptr" %}
 8918   ins_encode %{
 8919     if ($dst$$reg != $src$$reg) {
 8920       __ movptr($dst$$Register, $src$$Register);
 8921     }
 8922   %}
 8923   ins_pipe(ialu_reg_reg); // XXX
 8924 %}
 8925 
 8926 instruct castP2X(rRegL dst, rRegP src)
 8927 %{
 8928   match(Set dst (CastP2X src));
 8929 
 8930   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8931   ins_encode %{
 8932     if ($dst$$reg != $src$$reg) {
 8933       __ movptr($dst$$Register, $src$$Register);
 8934     }
 8935   %}
 8936   ins_pipe(ialu_reg_reg); // XXX
 8937 %}
 8938 
 8939 // Convert oop into int for vectors alignment masking
 8940 instruct convP2I(rRegI dst, rRegP src)
 8941 %{
 8942   match(Set dst (ConvL2I (CastP2X src)));
 8943 
 8944   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8945   ins_encode %{
 8946     __ movl($dst$$Register, $src$$Register);
 8947   %}
 8948   ins_pipe(ialu_reg_reg); // XXX
 8949 %}
 8950 
 8951 // Convert compressed oop into int for vectors alignment masking
 8952 // in case of 32bit oops (heap < 4Gb).
 8953 instruct convN2I(rRegI dst, rRegN src)
 8954 %{
 8955   predicate(CompressedOops::shift() == 0);
 8956   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8957 
 8958   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8959   ins_encode %{
 8960     __ movl($dst$$Register, $src$$Register);
 8961   %}
 8962   ins_pipe(ialu_reg_reg); // XXX
 8963 %}
 8964 
 8965 // Convert oop pointer into compressed form
 8966 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8967   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8968   match(Set dst (EncodeP src));
 8969   effect(KILL cr);
 8970   format %{ "encode_heap_oop $dst,$src" %}
 8971   ins_encode %{
 8972     Register s = $src$$Register;
 8973     Register d = $dst$$Register;
 8974     if (s != d) {
 8975       __ movq(d, s);
 8976     }
 8977     __ encode_heap_oop(d);
 8978   %}
 8979   ins_pipe(ialu_reg_long);
 8980 %}
 8981 
 8982 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8983   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8984   match(Set dst (EncodeP src));
 8985   effect(KILL cr);
 8986   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8987   ins_encode %{
 8988     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8989   %}
 8990   ins_pipe(ialu_reg_long);
 8991 %}
 8992 
 8993 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8994   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8995             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8996   match(Set dst (DecodeN src));
 8997   effect(KILL cr);
 8998   format %{ "decode_heap_oop $dst,$src" %}
 8999   ins_encode %{
 9000     Register s = $src$$Register;
 9001     Register d = $dst$$Register;
 9002     if (s != d) {
 9003       __ movq(d, s);
 9004     }
 9005     __ decode_heap_oop(d);
 9006   %}
 9007   ins_pipe(ialu_reg_long);
 9008 %}
 9009 
 9010 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9011   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9012             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9013   match(Set dst (DecodeN src));
 9014   effect(KILL cr);
 9015   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9016   ins_encode %{
 9017     Register s = $src$$Register;
 9018     Register d = $dst$$Register;
 9019     if (s != d) {
 9020       __ decode_heap_oop_not_null(d, s);
 9021     } else {
 9022       __ decode_heap_oop_not_null(d);
 9023     }
 9024   %}
 9025   ins_pipe(ialu_reg_long);
 9026 %}
 9027 
 9028 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9029   match(Set dst (EncodePKlass src));
 9030   effect(TEMP dst, KILL cr);
 9031   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9032   ins_encode %{
 9033     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9034   %}
 9035   ins_pipe(ialu_reg_long);
 9036 %}
 9037 
 9038 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9039   match(Set dst (DecodeNKlass src));
 9040   effect(TEMP dst, KILL cr);
 9041   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9042   ins_encode %{
 9043     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9044   %}
 9045   ins_pipe(ialu_reg_long);
 9046 %}
 9047 
 9048 //----------Conditional Move---------------------------------------------------
 9049 // Jump
 9050 // dummy instruction for generating temp registers
 9051 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9052   match(Jump (LShiftL switch_val shift));
 9053   ins_cost(350);
 9054   predicate(false);
 9055   effect(TEMP dest);
 9056 
 9057   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9058             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9059   ins_encode %{
 9060     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9061     // to do that and the compiler is using that register as one it can allocate.
 9062     // So we build it all by hand.
 9063     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9064     // ArrayAddress dispatch(table, index);
 9065     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9066     __ lea($dest$$Register, $constantaddress);
 9067     __ jmp(dispatch);
 9068   %}
 9069   ins_pipe(pipe_jmp);
 9070 %}
 9071 
 9072 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9073   match(Jump (AddL (LShiftL switch_val shift) offset));
 9074   ins_cost(350);
 9075   effect(TEMP dest);
 9076 
 9077   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9078             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9079   ins_encode %{
 9080     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9081     // to do that and the compiler is using that register as one it can allocate.
 9082     // So we build it all by hand.
 9083     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9084     // ArrayAddress dispatch(table, index);
 9085     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9086     __ lea($dest$$Register, $constantaddress);
 9087     __ jmp(dispatch);
 9088   %}
 9089   ins_pipe(pipe_jmp);
 9090 %}
 9091 
 9092 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9093   match(Jump switch_val);
 9094   ins_cost(350);
 9095   effect(TEMP dest);
 9096 
 9097   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9098             "jmp     [$dest + $switch_val]\n\t" %}
 9099   ins_encode %{
 9100     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9101     // to do that and the compiler is using that register as one it can allocate.
 9102     // So we build it all by hand.
 9103     // Address index(noreg, switch_reg, Address::times_1);
 9104     // ArrayAddress dispatch(table, index);
 9105     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9106     __ lea($dest$$Register, $constantaddress);
 9107     __ jmp(dispatch);
 9108   %}
 9109   ins_pipe(pipe_jmp);
 9110 %}
 9111 
 9112 // Conditional move
 9113 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9114 %{
 9115   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9116   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9117 
 9118   ins_cost(100); // XXX
 9119   format %{ "setbn$cop $dst\t# signed, int" %}
 9120   ins_encode %{
 9121     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9122     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9123   %}
 9124   ins_pipe(ialu_reg);
 9125 %}
 9126 
 9127 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9128 %{
 9129   predicate(!UseAPX);
 9130   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9131 
 9132   ins_cost(200); // XXX
 9133   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9134   ins_encode %{
 9135     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9136   %}
 9137   ins_pipe(pipe_cmov_reg);
 9138 %}
 9139 
 9140 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9141 %{
 9142   predicate(UseAPX);
 9143   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9144 
 9145   ins_cost(200);
 9146   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9147   ins_encode %{
 9148     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9149   %}
 9150   ins_pipe(pipe_cmov_reg);
 9151 %}
 9152 
 9153 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9154 %{
 9155   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9156   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9157 
 9158   ins_cost(100); // XXX
 9159   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9160   ins_encode %{
 9161     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9162     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9163   %}
 9164   ins_pipe(ialu_reg);
 9165 %}
 9166 
 9167 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9168   predicate(!UseAPX);
 9169   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9170 
 9171   ins_cost(200); // XXX
 9172   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9173   ins_encode %{
 9174     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9175   %}
 9176   ins_pipe(pipe_cmov_reg);
 9177 %}
 9178 
 9179 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9180   predicate(UseAPX);
 9181   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9182 
 9183   ins_cost(200);
 9184   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9185   ins_encode %{
 9186     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9187   %}
 9188   ins_pipe(pipe_cmov_reg);
 9189 %}
 9190 
 9191 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9192 %{
 9193   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9194   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9195 
 9196   ins_cost(100); // XXX
 9197   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9198   ins_encode %{
 9199     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9200     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9201   %}
 9202   ins_pipe(ialu_reg);
 9203 %}
 9204 
 9205 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9206 %{
 9207   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9208   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9209 
 9210   ins_cost(100); // XXX
 9211   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9212   ins_encode %{
 9213     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9214     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9215   %}
 9216   ins_pipe(ialu_reg);
 9217 %}
 9218 
 9219 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9220   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9221 
 9222   ins_cost(200);
 9223   expand %{
 9224     cmovI_regU(cop, cr, dst, src);
 9225   %}
 9226 %}
 9227 
 9228 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9229   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9230 
 9231   ins_cost(200);
 9232   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9233   ins_encode %{
 9234     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9235   %}
 9236   ins_pipe(pipe_cmov_reg);
 9237 %}
 9238 
 9239 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9240   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9241   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9242 
 9243   ins_cost(200); // XXX
 9244   format %{ "cmovpl  $dst, $src\n\t"
 9245             "cmovnel $dst, $src" %}
 9246   ins_encode %{
 9247     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9248     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9249   %}
 9250   ins_pipe(pipe_cmov_reg);
 9251 %}
 9252 
 9253 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9254 // inputs of the CMove
 9255 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9256   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9257   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9258   effect(TEMP dst);
 9259 
 9260   ins_cost(200); // XXX
 9261   format %{ "cmovpl  $dst, $src\n\t"
 9262             "cmovnel $dst, $src" %}
 9263   ins_encode %{
 9264     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9265     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9266   %}
 9267   ins_pipe(pipe_cmov_reg);
 9268 %}
 9269 
 9270 // Conditional move
 9271 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9272   predicate(!UseAPX);
 9273   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9274 
 9275   ins_cost(250); // XXX
 9276   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9277   ins_encode %{
 9278     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9279   %}
 9280   ins_pipe(pipe_cmov_mem);
 9281 %}
 9282 
 9283 // Conditional move
 9284 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9285 %{
 9286   predicate(UseAPX);
 9287   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9288 
 9289   ins_cost(250);
 9290   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9291   ins_encode %{
 9292     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9293   %}
 9294   ins_pipe(pipe_cmov_mem);
 9295 %}
 9296 
 9297 // Conditional move
 9298 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9299 %{
 9300   predicate(!UseAPX);
 9301   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9302 
 9303   ins_cost(250); // XXX
 9304   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9305   ins_encode %{
 9306     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9307   %}
 9308   ins_pipe(pipe_cmov_mem);
 9309 %}
 9310 
 9311 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9312   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9313 
 9314   ins_cost(250);
 9315   expand %{
 9316     cmovI_memU(cop, cr, dst, src);
 9317   %}
 9318 %}
 9319 
 9320 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9321 %{
 9322   predicate(UseAPX);
 9323   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9324 
 9325   ins_cost(250);
 9326   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9327   ins_encode %{
 9328     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9329   %}
 9330   ins_pipe(pipe_cmov_mem);
 9331 %}
 9332 
 9333 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9334 %{
 9335   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9336 
 9337   ins_cost(250);
 9338   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9339   ins_encode %{
 9340     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9341   %}
 9342   ins_pipe(pipe_cmov_mem);
 9343 %}
 9344 
 9345 // Conditional move
 9346 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9347 %{
 9348   predicate(!UseAPX);
 9349   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9350 
 9351   ins_cost(200); // XXX
 9352   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9353   ins_encode %{
 9354     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9355   %}
 9356   ins_pipe(pipe_cmov_reg);
 9357 %}
 9358 
 9359 // Conditional move ndd
 9360 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9361 %{
 9362   predicate(UseAPX);
 9363   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9364 
 9365   ins_cost(200);
 9366   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9367   ins_encode %{
 9368     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9369   %}
 9370   ins_pipe(pipe_cmov_reg);
 9371 %}
 9372 
 9373 // Conditional move
 9374 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9375 %{
 9376   predicate(!UseAPX);
 9377   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9378 
 9379   ins_cost(200); // XXX
 9380   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9381   ins_encode %{
 9382     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9383   %}
 9384   ins_pipe(pipe_cmov_reg);
 9385 %}
 9386 
 9387 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9388   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9389 
 9390   ins_cost(200);
 9391   expand %{
 9392     cmovN_regU(cop, cr, dst, src);
 9393   %}
 9394 %}
 9395 
 9396 // Conditional move ndd
 9397 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9398 %{
 9399   predicate(UseAPX);
 9400   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9401 
 9402   ins_cost(200);
 9403   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9404   ins_encode %{
 9405     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9406   %}
 9407   ins_pipe(pipe_cmov_reg);
 9408 %}
 9409 
 9410 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9411   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9412 
 9413   ins_cost(200);
 9414   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9415   ins_encode %{
 9416     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9417   %}
 9418   ins_pipe(pipe_cmov_reg);
 9419 %}
 9420 
 9421 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9422   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9423   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9424 
 9425   ins_cost(200); // XXX
 9426   format %{ "cmovpl  $dst, $src\n\t"
 9427             "cmovnel $dst, $src" %}
 9428   ins_encode %{
 9429     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9430     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9431   %}
 9432   ins_pipe(pipe_cmov_reg);
 9433 %}
 9434 
 9435 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9436 // inputs of the CMove
 9437 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9438   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9439   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9440 
 9441   ins_cost(200); // XXX
 9442   format %{ "cmovpl  $dst, $src\n\t"
 9443             "cmovnel $dst, $src" %}
 9444   ins_encode %{
 9445     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9446     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9447   %}
 9448   ins_pipe(pipe_cmov_reg);
 9449 %}
 9450 
 9451 // Conditional move
 9452 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9453 %{
 9454   predicate(!UseAPX);
 9455   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9456 
 9457   ins_cost(200); // XXX
 9458   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9459   ins_encode %{
 9460     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9461   %}
 9462   ins_pipe(pipe_cmov_reg);  // XXX
 9463 %}
 9464 
 9465 // Conditional move ndd
 9466 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9467 %{
 9468   predicate(UseAPX);
 9469   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9470 
 9471   ins_cost(200);
 9472   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9473   ins_encode %{
 9474     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9475   %}
 9476   ins_pipe(pipe_cmov_reg);
 9477 %}
 9478 
 9479 // Conditional move
 9480 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9481 %{
 9482   predicate(!UseAPX);
 9483   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9484 
 9485   ins_cost(200); // XXX
 9486   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9487   ins_encode %{
 9488     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9489   %}
 9490   ins_pipe(pipe_cmov_reg); // XXX
 9491 %}
 9492 
 9493 // Conditional move ndd
 9494 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9495 %{
 9496   predicate(UseAPX);
 9497   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9498 
 9499   ins_cost(200);
 9500   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9501   ins_encode %{
 9502     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9503   %}
 9504   ins_pipe(pipe_cmov_reg);
 9505 %}
 9506 
 9507 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9508   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9509 
 9510   ins_cost(200);
 9511   expand %{
 9512     cmovP_regU(cop, cr, dst, src);
 9513   %}
 9514 %}
 9515 
 9516 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9517   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9518 
 9519   ins_cost(200);
 9520   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9521   ins_encode %{
 9522     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9523   %}
 9524   ins_pipe(pipe_cmov_reg);
 9525 %}
 9526 
 9527 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9528   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9529   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9530 
 9531   ins_cost(200); // XXX
 9532   format %{ "cmovpq  $dst, $src\n\t"
 9533             "cmovneq $dst, $src" %}
 9534   ins_encode %{
 9535     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9536     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9537   %}
 9538   ins_pipe(pipe_cmov_reg);
 9539 %}
 9540 
 9541 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9542 // inputs of the CMove
 9543 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9544   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9545   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9546 
 9547   ins_cost(200); // XXX
 9548   format %{ "cmovpq  $dst, $src\n\t"
 9549             "cmovneq $dst, $src" %}
 9550   ins_encode %{
 9551     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9552     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9553   %}
 9554   ins_pipe(pipe_cmov_reg);
 9555 %}
 9556 
 9557 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9558 %{
 9559   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9560   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9561 
 9562   ins_cost(100); // XXX
 9563   format %{ "setbn$cop $dst\t# signed, long" %}
 9564   ins_encode %{
 9565     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9566     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9567   %}
 9568   ins_pipe(ialu_reg);
 9569 %}
 9570 
 9571 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9572 %{
 9573   predicate(!UseAPX);
 9574   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9575 
 9576   ins_cost(200); // XXX
 9577   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9578   ins_encode %{
 9579     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9580   %}
 9581   ins_pipe(pipe_cmov_reg);  // XXX
 9582 %}
 9583 
 9584 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9585 %{
 9586   predicate(UseAPX);
 9587   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9588 
 9589   ins_cost(200);
 9590   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9591   ins_encode %{
 9592     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9593   %}
 9594   ins_pipe(pipe_cmov_reg);
 9595 %}
 9596 
 9597 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9598 %{
 9599   predicate(!UseAPX);
 9600   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9601 
 9602   ins_cost(200); // XXX
 9603   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9604   ins_encode %{
 9605     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9606   %}
 9607   ins_pipe(pipe_cmov_mem);  // XXX
 9608 %}
 9609 
 9610 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9611 %{
 9612   predicate(UseAPX);
 9613   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9614 
 9615   ins_cost(200);
 9616   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9617   ins_encode %{
 9618     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9619   %}
 9620   ins_pipe(pipe_cmov_mem);
 9621 %}
 9622 
 9623 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9624 %{
 9625   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9626   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9627 
 9628   ins_cost(100); // XXX
 9629   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9630   ins_encode %{
 9631     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9632     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9633   %}
 9634   ins_pipe(ialu_reg);
 9635 %}
 9636 
 9637 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9638 %{
 9639   predicate(!UseAPX);
 9640   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9641 
 9642   ins_cost(200); // XXX
 9643   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9644   ins_encode %{
 9645     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9646   %}
 9647   ins_pipe(pipe_cmov_reg); // XXX
 9648 %}
 9649 
 9650 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9651 %{
 9652   predicate(UseAPX);
 9653   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9654 
 9655   ins_cost(200);
 9656   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9657   ins_encode %{
 9658     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9659   %}
 9660   ins_pipe(pipe_cmov_reg);
 9661 %}
 9662 
 9663 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9664 %{
 9665   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9666   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9667 
 9668   ins_cost(100); // XXX
 9669   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9670   ins_encode %{
 9671     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9672     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9673   %}
 9674   ins_pipe(ialu_reg);
 9675 %}
 9676 
 9677 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9678 %{
 9679   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9680   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9681 
 9682   ins_cost(100); // XXX
 9683   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9684   ins_encode %{
 9685     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9686     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9687   %}
 9688   ins_pipe(ialu_reg);
 9689 %}
 9690 
 9691 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9692   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9693 
 9694   ins_cost(200);
 9695   expand %{
 9696     cmovL_regU(cop, cr, dst, src);
 9697   %}
 9698 %}
 9699 
 9700 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9701 %{
 9702   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9703 
 9704   ins_cost(200);
 9705   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9706   ins_encode %{
 9707     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9708   %}
 9709   ins_pipe(pipe_cmov_reg);
 9710 %}
 9711 
 9712 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9713   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9714   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9715 
 9716   ins_cost(200); // XXX
 9717   format %{ "cmovpq  $dst, $src\n\t"
 9718             "cmovneq $dst, $src" %}
 9719   ins_encode %{
 9720     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9721     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9722   %}
 9723   ins_pipe(pipe_cmov_reg);
 9724 %}
 9725 
 9726 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9727 // inputs of the CMove
 9728 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9729   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9730   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9731 
 9732   ins_cost(200); // XXX
 9733   format %{ "cmovpq  $dst, $src\n\t"
 9734             "cmovneq $dst, $src" %}
 9735   ins_encode %{
 9736     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9737     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9738   %}
 9739   ins_pipe(pipe_cmov_reg);
 9740 %}
 9741 
 9742 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9743 %{
 9744   predicate(!UseAPX);
 9745   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9746 
 9747   ins_cost(200); // XXX
 9748   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9749   ins_encode %{
 9750     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9751   %}
 9752   ins_pipe(pipe_cmov_mem); // XXX
 9753 %}
 9754 
 9755 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9756   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9757 
 9758   ins_cost(200);
 9759   expand %{
 9760     cmovL_memU(cop, cr, dst, src);
 9761   %}
 9762 %}
 9763 
 9764 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9765 %{
 9766   predicate(UseAPX);
 9767   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9768 
 9769   ins_cost(200);
 9770   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9771   ins_encode %{
 9772     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9773   %}
 9774   ins_pipe(pipe_cmov_mem);
 9775 %}
 9776 
 9777 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9778 %{
 9779   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9780 
 9781   ins_cost(200);
 9782   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9783   ins_encode %{
 9784     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9785   %}
 9786   ins_pipe(pipe_cmov_mem);
 9787 %}
 9788 
 9789 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9790 %{
 9791   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9792 
 9793   ins_cost(200); // XXX
 9794   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9795             "movss     $dst, $src\n"
 9796     "skip:" %}
 9797   ins_encode %{
 9798     Label Lskip;
 9799     // Invert sense of branch from sense of CMOV
 9800     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9801     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9802     __ bind(Lskip);
 9803   %}
 9804   ins_pipe(pipe_slow);
 9805 %}
 9806 
 9807 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9808 %{
 9809   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9810 
 9811   ins_cost(200); // XXX
 9812   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9813             "movss     $dst, $src\n"
 9814     "skip:" %}
 9815   ins_encode %{
 9816     Label Lskip;
 9817     // Invert sense of branch from sense of CMOV
 9818     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9819     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9820     __ bind(Lskip);
 9821   %}
 9822   ins_pipe(pipe_slow);
 9823 %}
 9824 
 9825 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9826   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9827 
 9828   ins_cost(200);
 9829   expand %{
 9830     cmovF_regU(cop, cr, dst, src);
 9831   %}
 9832 %}
 9833 
 9834 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9835 %{
 9836   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9837 
 9838   ins_cost(200); // XXX
 9839   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9840             "movss     $dst, $src\n"
 9841     "skip:" %}
 9842   ins_encode %{
 9843     Label Lskip;
 9844     // Invert sense of branch from sense of CMOV
 9845     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9846     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9847     __ bind(Lskip);
 9848   %}
 9849   ins_pipe(pipe_slow);
 9850 %}
 9851 
 9852 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9853 %{
 9854   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9855 
 9856   ins_cost(200); // XXX
 9857   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9858             "movsd     $dst, $src\n"
 9859     "skip:" %}
 9860   ins_encode %{
 9861     Label Lskip;
 9862     // Invert sense of branch from sense of CMOV
 9863     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9864     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9865     __ bind(Lskip);
 9866   %}
 9867   ins_pipe(pipe_slow);
 9868 %}
 9869 
 9870 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9871 %{
 9872   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9873 
 9874   ins_cost(200); // XXX
 9875   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9876             "movsd     $dst, $src\n"
 9877     "skip:" %}
 9878   ins_encode %{
 9879     Label Lskip;
 9880     // Invert sense of branch from sense of CMOV
 9881     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9882     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9883     __ bind(Lskip);
 9884   %}
 9885   ins_pipe(pipe_slow);
 9886 %}
 9887 
 9888 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9889   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9890 
 9891   ins_cost(200);
 9892   expand %{
 9893     cmovD_regU(cop, cr, dst, src);
 9894   %}
 9895 %}
 9896 
 9897 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9898 %{
 9899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9900 
 9901   ins_cost(200); // XXX
 9902   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9903             "movsd     $dst, $src\n"
 9904     "skip:" %}
 9905   ins_encode %{
 9906     Label Lskip;
 9907     // Invert sense of branch from sense of CMOV
 9908     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9909     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9910     __ bind(Lskip);
 9911   %}
 9912   ins_pipe(pipe_slow);
 9913 %}
 9914 
 9915 //----------Arithmetic Instructions--------------------------------------------
 9916 //----------Addition Instructions----------------------------------------------
 9917 
 9918 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9919 %{
 9920   predicate(!UseAPX);
 9921   match(Set dst (AddI dst src));
 9922   effect(KILL cr);
 9923   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9924   format %{ "addl    $dst, $src\t# int" %}
 9925   ins_encode %{
 9926     __ addl($dst$$Register, $src$$Register);
 9927   %}
 9928   ins_pipe(ialu_reg_reg);
 9929 %}
 9930 
 9931 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9932 %{
 9933   predicate(UseAPX);
 9934   match(Set dst (AddI src1 src2));
 9935   effect(KILL cr);
 9936   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9937 
 9938   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9939   ins_encode %{
 9940     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9941   %}
 9942   ins_pipe(ialu_reg_reg);
 9943 %}
 9944 
 9945 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9946 %{
 9947   predicate(!UseAPX);
 9948   match(Set dst (AddI dst src));
 9949   effect(KILL cr);
 9950   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9951 
 9952   format %{ "addl    $dst, $src\t# int" %}
 9953   ins_encode %{
 9954     __ addl($dst$$Register, $src$$constant);
 9955   %}
 9956   ins_pipe( ialu_reg );
 9957 %}
 9958 
 9959 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9960 %{
 9961   predicate(UseAPX);
 9962   match(Set dst (AddI src1 src2));
 9963   effect(KILL cr);
 9964   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9965 
 9966   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9967   ins_encode %{
 9968     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9969   %}
 9970   ins_pipe( ialu_reg );
 9971 %}
 9972 
 9973 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9974 %{
 9975   predicate(UseAPX);
 9976   match(Set dst (AddI (LoadI src1) src2));
 9977   effect(KILL cr);
 9978   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9979 
 9980   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9981   ins_encode %{
 9982     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9983   %}
 9984   ins_pipe( ialu_reg );
 9985 %}
 9986 
 9987 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9988 %{
 9989   predicate(!UseAPX);
 9990   match(Set dst (AddI dst (LoadI src)));
 9991   effect(KILL cr);
 9992   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9993 
 9994   ins_cost(150); // XXX
 9995   format %{ "addl    $dst, $src\t# int" %}
 9996   ins_encode %{
 9997     __ addl($dst$$Register, $src$$Address);
 9998   %}
 9999   ins_pipe(ialu_reg_mem);
10000 %}
10001 
10002 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10003 %{
10004   predicate(UseAPX);
10005   match(Set dst (AddI src1 (LoadI src2)));
10006   effect(KILL cr);
10007   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10008 
10009   ins_cost(150);
10010   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10011   ins_encode %{
10012     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10013   %}
10014   ins_pipe(ialu_reg_mem);
10015 %}
10016 
10017 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10018 %{
10019   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10020   effect(KILL cr);
10021   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10022 
10023   ins_cost(150); // XXX
10024   format %{ "addl    $dst, $src\t# int" %}
10025   ins_encode %{
10026     __ addl($dst$$Address, $src$$Register);
10027   %}
10028   ins_pipe(ialu_mem_reg);
10029 %}
10030 
10031 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10032 %{
10033   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10034   effect(KILL cr);
10035   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10036 
10037 
10038   ins_cost(125); // XXX
10039   format %{ "addl    $dst, $src\t# int" %}
10040   ins_encode %{
10041     __ addl($dst$$Address, $src$$constant);
10042   %}
10043   ins_pipe(ialu_mem_imm);
10044 %}
10045 
10046 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10047 %{
10048   predicate(!UseAPX && UseIncDec);
10049   match(Set dst (AddI dst src));
10050   effect(KILL cr);
10051 
10052   format %{ "incl    $dst\t# int" %}
10053   ins_encode %{
10054     __ incrementl($dst$$Register);
10055   %}
10056   ins_pipe(ialu_reg);
10057 %}
10058 
10059 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10060 %{
10061   predicate(UseAPX && UseIncDec);
10062   match(Set dst (AddI src val));
10063   effect(KILL cr);
10064   flag(PD::Flag_ndd_demotable_opr1);
10065 
10066   format %{ "eincl    $dst, $src\t# int ndd" %}
10067   ins_encode %{
10068     __ eincl($dst$$Register, $src$$Register, false);
10069   %}
10070   ins_pipe(ialu_reg);
10071 %}
10072 
10073 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10074 %{
10075   predicate(UseAPX && UseIncDec);
10076   match(Set dst (AddI (LoadI src) val));
10077   effect(KILL cr);
10078 
10079   format %{ "eincl    $dst, $src\t# int ndd" %}
10080   ins_encode %{
10081     __ eincl($dst$$Register, $src$$Address, false);
10082   %}
10083   ins_pipe(ialu_reg);
10084 %}
10085 
10086 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10087 %{
10088   predicate(UseIncDec);
10089   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10090   effect(KILL cr);
10091 
10092   ins_cost(125); // XXX
10093   format %{ "incl    $dst\t# int" %}
10094   ins_encode %{
10095     __ incrementl($dst$$Address);
10096   %}
10097   ins_pipe(ialu_mem_imm);
10098 %}
10099 
10100 // XXX why does that use AddI
10101 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10102 %{
10103   predicate(!UseAPX && UseIncDec);
10104   match(Set dst (AddI dst src));
10105   effect(KILL cr);
10106 
10107   format %{ "decl    $dst\t# int" %}
10108   ins_encode %{
10109     __ decrementl($dst$$Register);
10110   %}
10111   ins_pipe(ialu_reg);
10112 %}
10113 
10114 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10115 %{
10116   predicate(UseAPX && UseIncDec);
10117   match(Set dst (AddI src val));
10118   effect(KILL cr);
10119   flag(PD::Flag_ndd_demotable_opr1);
10120 
10121   format %{ "edecl    $dst, $src\t# int ndd" %}
10122   ins_encode %{
10123     __ edecl($dst$$Register, $src$$Register, false);
10124   %}
10125   ins_pipe(ialu_reg);
10126 %}
10127 
10128 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10129 %{
10130   predicate(UseAPX && UseIncDec);
10131   match(Set dst (AddI (LoadI src) val));
10132   effect(KILL cr);
10133 
10134   format %{ "edecl    $dst, $src\t# int ndd" %}
10135   ins_encode %{
10136     __ edecl($dst$$Register, $src$$Address, false);
10137   %}
10138   ins_pipe(ialu_reg);
10139 %}
10140 
10141 // XXX why does that use AddI
10142 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10143 %{
10144   predicate(UseIncDec);
10145   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10146   effect(KILL cr);
10147 
10148   ins_cost(125); // XXX
10149   format %{ "decl    $dst\t# int" %}
10150   ins_encode %{
10151     __ decrementl($dst$$Address);
10152   %}
10153   ins_pipe(ialu_mem_imm);
10154 %}
10155 
10156 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10157 %{
10158   predicate(VM_Version::supports_fast_2op_lea());
10159   match(Set dst (AddI (LShiftI index scale) disp));
10160 
10161   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10162   ins_encode %{
10163     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10164     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10165   %}
10166   ins_pipe(ialu_reg_reg);
10167 %}
10168 
10169 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10170 %{
10171   predicate(VM_Version::supports_fast_3op_lea());
10172   match(Set dst (AddI (AddI base index) disp));
10173 
10174   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10175   ins_encode %{
10176     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10177   %}
10178   ins_pipe(ialu_reg_reg);
10179 %}
10180 
10181 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10182 %{
10183   predicate(VM_Version::supports_fast_2op_lea());
10184   match(Set dst (AddI base (LShiftI index scale)));
10185 
10186   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10187   ins_encode %{
10188     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10189     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10190   %}
10191   ins_pipe(ialu_reg_reg);
10192 %}
10193 
10194 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10195 %{
10196   predicate(VM_Version::supports_fast_3op_lea());
10197   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10198 
10199   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10200   ins_encode %{
10201     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10202     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10203   %}
10204   ins_pipe(ialu_reg_reg);
10205 %}
10206 
10207 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10208 %{
10209   predicate(!UseAPX);
10210   match(Set dst (AddL dst src));
10211   effect(KILL cr);
10212   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10213 
10214   format %{ "addq    $dst, $src\t# long" %}
10215   ins_encode %{
10216     __ addq($dst$$Register, $src$$Register);
10217   %}
10218   ins_pipe(ialu_reg_reg);
10219 %}
10220 
10221 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10222 %{
10223   predicate(UseAPX);
10224   match(Set dst (AddL src1 src2));
10225   effect(KILL cr);
10226   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10227 
10228   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10229   ins_encode %{
10230     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10231   %}
10232   ins_pipe(ialu_reg_reg);
10233 %}
10234 
10235 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10236 %{
10237   predicate(!UseAPX);
10238   match(Set dst (AddL dst src));
10239   effect(KILL cr);
10240   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10241 
10242   format %{ "addq    $dst, $src\t# long" %}
10243   ins_encode %{
10244     __ addq($dst$$Register, $src$$constant);
10245   %}
10246   ins_pipe( ialu_reg );
10247 %}
10248 
10249 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10250 %{
10251   predicate(UseAPX);
10252   match(Set dst (AddL src1 src2));
10253   effect(KILL cr);
10254   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10255 
10256   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10257   ins_encode %{
10258     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10259   %}
10260   ins_pipe( ialu_reg );
10261 %}
10262 
10263 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10264 %{
10265   predicate(UseAPX);
10266   match(Set dst (AddL (LoadL src1) src2));
10267   effect(KILL cr);
10268   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10269 
10270   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10271   ins_encode %{
10272     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10273   %}
10274   ins_pipe( ialu_reg );
10275 %}
10276 
10277 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10278 %{
10279   predicate(!UseAPX);
10280   match(Set dst (AddL dst (LoadL src)));
10281   effect(KILL cr);
10282   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10283 
10284   ins_cost(150); // XXX
10285   format %{ "addq    $dst, $src\t# long" %}
10286   ins_encode %{
10287     __ addq($dst$$Register, $src$$Address);
10288   %}
10289   ins_pipe(ialu_reg_mem);
10290 %}
10291 
10292 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10293 %{
10294   predicate(UseAPX);
10295   match(Set dst (AddL src1 (LoadL src2)));
10296   effect(KILL cr);
10297   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10298 
10299   ins_cost(150);
10300   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10301   ins_encode %{
10302     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10303   %}
10304   ins_pipe(ialu_reg_mem);
10305 %}
10306 
10307 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10308 %{
10309   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10310   effect(KILL cr);
10311   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10312 
10313   ins_cost(150); // XXX
10314   format %{ "addq    $dst, $src\t# long" %}
10315   ins_encode %{
10316     __ addq($dst$$Address, $src$$Register);
10317   %}
10318   ins_pipe(ialu_mem_reg);
10319 %}
10320 
10321 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10322 %{
10323   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10324   effect(KILL cr);
10325   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10326 
10327   ins_cost(125); // XXX
10328   format %{ "addq    $dst, $src\t# long" %}
10329   ins_encode %{
10330     __ addq($dst$$Address, $src$$constant);
10331   %}
10332   ins_pipe(ialu_mem_imm);
10333 %}
10334 
10335 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10336 %{
10337   predicate(!UseAPX && UseIncDec);
10338   match(Set dst (AddL dst src));
10339   effect(KILL cr);
10340 
10341   format %{ "incq    $dst\t# long" %}
10342   ins_encode %{
10343     __ incrementq($dst$$Register);
10344   %}
10345   ins_pipe(ialu_reg);
10346 %}
10347 
10348 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10349 %{
10350   predicate(UseAPX && UseIncDec);
10351   match(Set dst (AddL src val));
10352   effect(KILL cr);
10353   flag(PD::Flag_ndd_demotable_opr1);
10354 
10355   format %{ "eincq    $dst, $src\t# long ndd" %}
10356   ins_encode %{
10357     __ eincq($dst$$Register, $src$$Register, false);
10358   %}
10359   ins_pipe(ialu_reg);
10360 %}
10361 
10362 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10363 %{
10364   predicate(UseAPX && UseIncDec);
10365   match(Set dst (AddL (LoadL src) val));
10366   effect(KILL cr);
10367 
10368   format %{ "eincq    $dst, $src\t# long ndd" %}
10369   ins_encode %{
10370     __ eincq($dst$$Register, $src$$Address, false);
10371   %}
10372   ins_pipe(ialu_reg);
10373 %}
10374 
10375 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10376 %{
10377   predicate(UseIncDec);
10378   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10379   effect(KILL cr);
10380 
10381   ins_cost(125); // XXX
10382   format %{ "incq    $dst\t# long" %}
10383   ins_encode %{
10384     __ incrementq($dst$$Address);
10385   %}
10386   ins_pipe(ialu_mem_imm);
10387 %}
10388 
10389 // XXX why does that use AddL
10390 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10391 %{
10392   predicate(!UseAPX && UseIncDec);
10393   match(Set dst (AddL dst src));
10394   effect(KILL cr);
10395 
10396   format %{ "decq    $dst\t# long" %}
10397   ins_encode %{
10398     __ decrementq($dst$$Register);
10399   %}
10400   ins_pipe(ialu_reg);
10401 %}
10402 
10403 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10404 %{
10405   predicate(UseAPX && UseIncDec);
10406   match(Set dst (AddL src val));
10407   effect(KILL cr);
10408   flag(PD::Flag_ndd_demotable_opr1);
10409 
10410   format %{ "edecq    $dst, $src\t# long ndd" %}
10411   ins_encode %{
10412     __ edecq($dst$$Register, $src$$Register, false);
10413   %}
10414   ins_pipe(ialu_reg);
10415 %}
10416 
10417 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10418 %{
10419   predicate(UseAPX && UseIncDec);
10420   match(Set dst (AddL (LoadL src) val));
10421   effect(KILL cr);
10422 
10423   format %{ "edecq    $dst, $src\t# long ndd" %}
10424   ins_encode %{
10425     __ edecq($dst$$Register, $src$$Address, false);
10426   %}
10427   ins_pipe(ialu_reg);
10428 %}
10429 
10430 // XXX why does that use AddL
10431 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10432 %{
10433   predicate(UseIncDec);
10434   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10435   effect(KILL cr);
10436 
10437   ins_cost(125); // XXX
10438   format %{ "decq    $dst\t# long" %}
10439   ins_encode %{
10440     __ decrementq($dst$$Address);
10441   %}
10442   ins_pipe(ialu_mem_imm);
10443 %}
10444 
10445 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10446 %{
10447   predicate(VM_Version::supports_fast_2op_lea());
10448   match(Set dst (AddL (LShiftL index scale) disp));
10449 
10450   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10451   ins_encode %{
10452     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10453     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10454   %}
10455   ins_pipe(ialu_reg_reg);
10456 %}
10457 
10458 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10459 %{
10460   predicate(VM_Version::supports_fast_3op_lea());
10461   match(Set dst (AddL (AddL base index) disp));
10462 
10463   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10464   ins_encode %{
10465     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10466   %}
10467   ins_pipe(ialu_reg_reg);
10468 %}
10469 
10470 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10471 %{
10472   predicate(VM_Version::supports_fast_2op_lea());
10473   match(Set dst (AddL base (LShiftL index scale)));
10474 
10475   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10476   ins_encode %{
10477     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10478     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10479   %}
10480   ins_pipe(ialu_reg_reg);
10481 %}
10482 
10483 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10484 %{
10485   predicate(VM_Version::supports_fast_3op_lea());
10486   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10487 
10488   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10489   ins_encode %{
10490     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10491     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10492   %}
10493   ins_pipe(ialu_reg_reg);
10494 %}
10495 
10496 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10497 %{
10498   match(Set dst (AddP dst src));
10499   effect(KILL cr);
10500   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10501 
10502   format %{ "addq    $dst, $src\t# ptr" %}
10503   ins_encode %{
10504     __ addq($dst$$Register, $src$$Register);
10505   %}
10506   ins_pipe(ialu_reg_reg);
10507 %}
10508 
10509 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10510 %{
10511   match(Set dst (AddP dst src));
10512   effect(KILL cr);
10513   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10514 
10515   format %{ "addq    $dst, $src\t# ptr" %}
10516   ins_encode %{
10517     __ addq($dst$$Register, $src$$constant);
10518   %}
10519   ins_pipe( ialu_reg );
10520 %}
10521 
10522 // XXX addP mem ops ????
10523 
10524 instruct checkCastPP(rRegP dst)
10525 %{
10526   match(Set dst (CheckCastPP dst));
10527 
10528   size(0);
10529   format %{ "# checkcastPP of $dst" %}
10530   ins_encode(/* empty encoding */);
10531   ins_pipe(empty);
10532 %}
10533 
10534 instruct castPP(rRegP dst)
10535 %{
10536   match(Set dst (CastPP dst));
10537 
10538   size(0);
10539   format %{ "# castPP of $dst" %}
10540   ins_encode(/* empty encoding */);
10541   ins_pipe(empty);
10542 %}
10543 
10544 instruct castII(rRegI dst)
10545 %{
10546   predicate(VerifyConstraintCasts == 0);
10547   match(Set dst (CastII dst));
10548 
10549   size(0);
10550   format %{ "# castII of $dst" %}
10551   ins_encode(/* empty encoding */);
10552   ins_cost(0);
10553   ins_pipe(empty);
10554 %}
10555 
10556 instruct castII_checked(rRegI dst, rFlagsReg cr)
10557 %{
10558   predicate(VerifyConstraintCasts > 0);
10559   match(Set dst (CastII dst));
10560 
10561   effect(KILL cr);
10562   format %{ "# cast_checked_II $dst" %}
10563   ins_encode %{
10564     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10565   %}
10566   ins_pipe(pipe_slow);
10567 %}
10568 
10569 instruct castLL(rRegL dst)
10570 %{
10571   predicate(VerifyConstraintCasts == 0);
10572   match(Set dst (CastLL dst));
10573 
10574   size(0);
10575   format %{ "# castLL of $dst" %}
10576   ins_encode(/* empty encoding */);
10577   ins_cost(0);
10578   ins_pipe(empty);
10579 %}
10580 
10581 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10582 %{
10583   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10584   match(Set dst (CastLL dst));
10585 
10586   effect(KILL cr);
10587   format %{ "# cast_checked_LL $dst" %}
10588   ins_encode %{
10589     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10590   %}
10591   ins_pipe(pipe_slow);
10592 %}
10593 
10594 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10595 %{
10596   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10597   match(Set dst (CastLL dst));
10598 
10599   effect(KILL cr, TEMP tmp);
10600   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10601   ins_encode %{
10602     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10603   %}
10604   ins_pipe(pipe_slow);
10605 %}
10606 
10607 instruct castFF(regF dst)
10608 %{
10609   match(Set dst (CastFF dst));
10610 
10611   size(0);
10612   format %{ "# castFF of $dst" %}
10613   ins_encode(/* empty encoding */);
10614   ins_cost(0);
10615   ins_pipe(empty);
10616 %}
10617 
10618 instruct castHH(regF dst)
10619 %{
10620   match(Set dst (CastHH dst));
10621 
10622   size(0);
10623   format %{ "# castHH of $dst" %}
10624   ins_encode(/* empty encoding */);
10625   ins_cost(0);
10626   ins_pipe(empty);
10627 %}
10628 
10629 instruct castDD(regD dst)
10630 %{
10631   match(Set dst (CastDD dst));
10632 
10633   size(0);
10634   format %{ "# castDD of $dst" %}
10635   ins_encode(/* empty encoding */);
10636   ins_cost(0);
10637   ins_pipe(empty);
10638 %}
10639 
10640 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10641 instruct compareAndSwapP(rRegI res,
10642                          memory mem_ptr,
10643                          rax_RegP oldval, rRegP newval,
10644                          rFlagsReg cr)
10645 %{
10646   predicate(n->as_LoadStore()->barrier_data() == 0);
10647   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10648   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10649   effect(KILL cr, KILL oldval);
10650 
10651   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10652             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10653             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10654   ins_encode %{
10655     __ lock();
10656     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10657     __ setcc(Assembler::equal, $res$$Register);
10658   %}
10659   ins_pipe( pipe_cmpxchg );
10660 %}
10661 
10662 instruct compareAndSwapL(rRegI res,
10663                          memory mem_ptr,
10664                          rax_RegL oldval, rRegL newval,
10665                          rFlagsReg cr)
10666 %{
10667   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10668   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10669   effect(KILL cr, KILL oldval);
10670 
10671   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10672             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10673             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10674   ins_encode %{
10675     __ lock();
10676     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10677     __ setcc(Assembler::equal, $res$$Register);
10678   %}
10679   ins_pipe( pipe_cmpxchg );
10680 %}
10681 
10682 instruct compareAndSwapI(rRegI res,
10683                          memory mem_ptr,
10684                          rax_RegI oldval, rRegI newval,
10685                          rFlagsReg cr)
10686 %{
10687   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10688   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10689   effect(KILL cr, KILL oldval);
10690 
10691   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10692             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10693             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10694   ins_encode %{
10695     __ lock();
10696     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10697     __ setcc(Assembler::equal, $res$$Register);
10698   %}
10699   ins_pipe( pipe_cmpxchg );
10700 %}
10701 
10702 instruct compareAndSwapB(rRegI res,
10703                          memory mem_ptr,
10704                          rax_RegI oldval, rRegI newval,
10705                          rFlagsReg cr)
10706 %{
10707   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10708   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10709   effect(KILL cr, KILL oldval);
10710 
10711   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10712             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10713             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10714   ins_encode %{
10715     __ lock();
10716     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10717     __ setcc(Assembler::equal, $res$$Register);
10718   %}
10719   ins_pipe( pipe_cmpxchg );
10720 %}
10721 
10722 instruct compareAndSwapS(rRegI res,
10723                          memory mem_ptr,
10724                          rax_RegI oldval, rRegI newval,
10725                          rFlagsReg cr)
10726 %{
10727   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10728   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10729   effect(KILL cr, KILL oldval);
10730 
10731   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10732             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10733             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10734   ins_encode %{
10735     __ lock();
10736     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10737     __ setcc(Assembler::equal, $res$$Register);
10738   %}
10739   ins_pipe( pipe_cmpxchg );
10740 %}
10741 
10742 instruct compareAndSwapN(rRegI res,
10743                           memory mem_ptr,
10744                           rax_RegN oldval, rRegN newval,
10745                           rFlagsReg cr) %{
10746   predicate(n->as_LoadStore()->barrier_data() == 0);
10747   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10748   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10749   effect(KILL cr, KILL oldval);
10750 
10751   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10752             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10753             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10754   ins_encode %{
10755     __ lock();
10756     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10757     __ setcc(Assembler::equal, $res$$Register);
10758   %}
10759   ins_pipe( pipe_cmpxchg );
10760 %}
10761 
10762 instruct compareAndExchangeB(
10763                          memory mem_ptr,
10764                          rax_RegI oldval, rRegI newval,
10765                          rFlagsReg cr)
10766 %{
10767   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10768   effect(KILL cr);
10769 
10770   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10771             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10772   ins_encode %{
10773     __ lock();
10774     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10775   %}
10776   ins_pipe( pipe_cmpxchg );
10777 %}
10778 
10779 instruct compareAndExchangeS(
10780                          memory mem_ptr,
10781                          rax_RegI oldval, rRegI newval,
10782                          rFlagsReg cr)
10783 %{
10784   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10785   effect(KILL cr);
10786 
10787   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10788             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10789   ins_encode %{
10790     __ lock();
10791     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10792   %}
10793   ins_pipe( pipe_cmpxchg );
10794 %}
10795 
10796 instruct compareAndExchangeI(
10797                          memory mem_ptr,
10798                          rax_RegI oldval, rRegI newval,
10799                          rFlagsReg cr)
10800 %{
10801   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10802   effect(KILL cr);
10803 
10804   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10805             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10806   ins_encode %{
10807     __ lock();
10808     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10809   %}
10810   ins_pipe( pipe_cmpxchg );
10811 %}
10812 
10813 instruct compareAndExchangeL(
10814                          memory mem_ptr,
10815                          rax_RegL oldval, rRegL newval,
10816                          rFlagsReg cr)
10817 %{
10818   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10819   effect(KILL cr);
10820 
10821   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10822             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10823   ins_encode %{
10824     __ lock();
10825     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10826   %}
10827   ins_pipe( pipe_cmpxchg );
10828 %}
10829 
10830 instruct compareAndExchangeN(
10831                           memory mem_ptr,
10832                           rax_RegN oldval, rRegN newval,
10833                           rFlagsReg cr) %{
10834   predicate(n->as_LoadStore()->barrier_data() == 0);
10835   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10836   effect(KILL cr);
10837 
10838   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10839             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10840   ins_encode %{
10841     __ lock();
10842     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10843   %}
10844   ins_pipe( pipe_cmpxchg );
10845 %}
10846 
10847 instruct compareAndExchangeP(
10848                          memory mem_ptr,
10849                          rax_RegP oldval, rRegP newval,
10850                          rFlagsReg cr)
10851 %{
10852   predicate(n->as_LoadStore()->barrier_data() == 0);
10853   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10854   effect(KILL cr);
10855 
10856   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10857             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10858   ins_encode %{
10859     __ lock();
10860     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10861   %}
10862   ins_pipe( pipe_cmpxchg );
10863 %}
10864 
10865 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10866   predicate(n->as_LoadStore()->result_not_used());
10867   match(Set dummy (GetAndAddB mem add));
10868   effect(KILL cr);
10869   format %{ "addb_lock   $mem, $add" %}
10870   ins_encode %{
10871     __ lock();
10872     __ addb($mem$$Address, $add$$Register);
10873   %}
10874   ins_pipe(pipe_cmpxchg);
10875 %}
10876 
10877 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10878   predicate(n->as_LoadStore()->result_not_used());
10879   match(Set dummy (GetAndAddB mem add));
10880   effect(KILL cr);
10881   format %{ "addb_lock   $mem, $add" %}
10882   ins_encode %{
10883     __ lock();
10884     __ addb($mem$$Address, $add$$constant);
10885   %}
10886   ins_pipe(pipe_cmpxchg);
10887 %}
10888 
10889 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10890   predicate(!n->as_LoadStore()->result_not_used());
10891   match(Set newval (GetAndAddB mem newval));
10892   effect(KILL cr);
10893   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10894   ins_encode %{
10895     __ lock();
10896     __ xaddb($mem$$Address, $newval$$Register);
10897     __ narrow_subword_type($newval$$Register, T_BYTE);
10898   %}
10899   ins_pipe(pipe_cmpxchg);
10900 %}
10901 
10902 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10903   predicate(n->as_LoadStore()->result_not_used());
10904   match(Set dummy (GetAndAddS mem add));
10905   effect(KILL cr);
10906   format %{ "addw_lock   $mem, $add" %}
10907   ins_encode %{
10908     __ lock();
10909     __ addw($mem$$Address, $add$$Register);
10910   %}
10911   ins_pipe(pipe_cmpxchg);
10912 %}
10913 
10914 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10915   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10916   match(Set dummy (GetAndAddS mem add));
10917   effect(KILL cr);
10918   format %{ "addw_lock   $mem, $add" %}
10919   ins_encode %{
10920     __ lock();
10921     __ addw($mem$$Address, $add$$constant);
10922   %}
10923   ins_pipe(pipe_cmpxchg);
10924 %}
10925 
10926 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10927   predicate(!n->as_LoadStore()->result_not_used());
10928   match(Set newval (GetAndAddS mem newval));
10929   effect(KILL cr);
10930   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
10931   ins_encode %{
10932     __ lock();
10933     __ xaddw($mem$$Address, $newval$$Register);
10934     __ narrow_subword_type($newval$$Register, T_SHORT);
10935   %}
10936   ins_pipe(pipe_cmpxchg);
10937 %}
10938 
10939 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10940   predicate(n->as_LoadStore()->result_not_used());
10941   match(Set dummy (GetAndAddI mem add));
10942   effect(KILL cr);
10943   format %{ "addl_lock   $mem, $add" %}
10944   ins_encode %{
10945     __ lock();
10946     __ addl($mem$$Address, $add$$Register);
10947   %}
10948   ins_pipe(pipe_cmpxchg);
10949 %}
10950 
10951 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10952   predicate(n->as_LoadStore()->result_not_used());
10953   match(Set dummy (GetAndAddI mem add));
10954   effect(KILL cr);
10955   format %{ "addl_lock   $mem, $add" %}
10956   ins_encode %{
10957     __ lock();
10958     __ addl($mem$$Address, $add$$constant);
10959   %}
10960   ins_pipe(pipe_cmpxchg);
10961 %}
10962 
10963 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10964   predicate(!n->as_LoadStore()->result_not_used());
10965   match(Set newval (GetAndAddI mem newval));
10966   effect(KILL cr);
10967   format %{ "xaddl_lock  $mem, $newval" %}
10968   ins_encode %{
10969     __ lock();
10970     __ xaddl($mem$$Address, $newval$$Register);
10971   %}
10972   ins_pipe(pipe_cmpxchg);
10973 %}
10974 
10975 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10976   predicate(n->as_LoadStore()->result_not_used());
10977   match(Set dummy (GetAndAddL mem add));
10978   effect(KILL cr);
10979   format %{ "addq_lock   $mem, $add" %}
10980   ins_encode %{
10981     __ lock();
10982     __ addq($mem$$Address, $add$$Register);
10983   %}
10984   ins_pipe(pipe_cmpxchg);
10985 %}
10986 
10987 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10988   predicate(n->as_LoadStore()->result_not_used());
10989   match(Set dummy (GetAndAddL mem add));
10990   effect(KILL cr);
10991   format %{ "addq_lock   $mem, $add" %}
10992   ins_encode %{
10993     __ lock();
10994     __ addq($mem$$Address, $add$$constant);
10995   %}
10996   ins_pipe(pipe_cmpxchg);
10997 %}
10998 
10999 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11000   predicate(!n->as_LoadStore()->result_not_used());
11001   match(Set newval (GetAndAddL mem newval));
11002   effect(KILL cr);
11003   format %{ "xaddq_lock  $mem, $newval" %}
11004   ins_encode %{
11005     __ lock();
11006     __ xaddq($mem$$Address, $newval$$Register);
11007   %}
11008   ins_pipe(pipe_cmpxchg);
11009 %}
11010 
11011 instruct xchgB( memory mem, rRegI newval) %{
11012   match(Set newval (GetAndSetB mem newval));
11013   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
11014   ins_encode %{
11015     __ xchgb($newval$$Register, $mem$$Address);
11016     __ narrow_subword_type($newval$$Register, T_BYTE);
11017   %}
11018   ins_pipe( pipe_cmpxchg );
11019 %}
11020 
11021 instruct xchgS( memory mem, rRegI newval) %{
11022   match(Set newval (GetAndSetS mem newval));
11023   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
11024   ins_encode %{
11025     __ xchgw($newval$$Register, $mem$$Address);
11026     __ narrow_subword_type($newval$$Register, T_SHORT);
11027   %}
11028   ins_pipe( pipe_cmpxchg );
11029 %}
11030 
11031 instruct xchgI( memory mem, rRegI newval) %{
11032   match(Set newval (GetAndSetI mem newval));
11033   format %{ "XCHGL  $newval,[$mem]" %}
11034   ins_encode %{
11035     __ xchgl($newval$$Register, $mem$$Address);
11036   %}
11037   ins_pipe( pipe_cmpxchg );
11038 %}
11039 
11040 instruct xchgL( memory mem, rRegL newval) %{
11041   match(Set newval (GetAndSetL mem newval));
11042   format %{ "XCHGL  $newval,[$mem]" %}
11043   ins_encode %{
11044     __ xchgq($newval$$Register, $mem$$Address);
11045   %}
11046   ins_pipe( pipe_cmpxchg );
11047 %}
11048 
11049 instruct xchgP( memory mem, rRegP newval) %{
11050   match(Set newval (GetAndSetP mem newval));
11051   predicate(n->as_LoadStore()->barrier_data() == 0);
11052   format %{ "XCHGQ  $newval,[$mem]" %}
11053   ins_encode %{
11054     __ xchgq($newval$$Register, $mem$$Address);
11055   %}
11056   ins_pipe( pipe_cmpxchg );
11057 %}
11058 
11059 instruct xchgN( memory mem, rRegN newval) %{
11060   predicate(n->as_LoadStore()->barrier_data() == 0);
11061   match(Set newval (GetAndSetN mem newval));
11062   format %{ "XCHGL  $newval,$mem]" %}
11063   ins_encode %{
11064     __ xchgl($newval$$Register, $mem$$Address);
11065   %}
11066   ins_pipe( pipe_cmpxchg );
11067 %}
11068 
11069 //----------Abs Instructions-------------------------------------------
11070 
11071 // Integer Absolute Instructions
11072 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11073 %{
11074   match(Set dst (AbsI src));
11075   effect(TEMP dst, KILL cr);
11076   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11077             "subl    $dst, $src\n\t"
11078             "cmovll  $dst, $src" %}
11079   ins_encode %{
11080     __ xorl($dst$$Register, $dst$$Register);
11081     __ subl($dst$$Register, $src$$Register);
11082     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11083   %}
11084 
11085   ins_pipe(ialu_reg_reg);
11086 %}
11087 
11088 // Long Absolute Instructions
11089 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11090 %{
11091   match(Set dst (AbsL src));
11092   effect(TEMP dst, KILL cr);
11093   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11094             "subq    $dst, $src\n\t"
11095             "cmovlq  $dst, $src" %}
11096   ins_encode %{
11097     __ xorl($dst$$Register, $dst$$Register);
11098     __ subq($dst$$Register, $src$$Register);
11099     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11100   %}
11101 
11102   ins_pipe(ialu_reg_reg);
11103 %}
11104 
11105 //----------Subtraction Instructions-------------------------------------------
11106 
11107 // Integer Subtraction Instructions
11108 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11109 %{
11110   predicate(!UseAPX);
11111   match(Set dst (SubI dst src));
11112   effect(KILL cr);
11113   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11114 
11115   format %{ "subl    $dst, $src\t# int" %}
11116   ins_encode %{
11117     __ subl($dst$$Register, $src$$Register);
11118   %}
11119   ins_pipe(ialu_reg_reg);
11120 %}
11121 
11122 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11123 %{
11124   predicate(UseAPX);
11125   match(Set dst (SubI src1 src2));
11126   effect(KILL cr);
11127   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11128 
11129   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11130   ins_encode %{
11131     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11132   %}
11133   ins_pipe(ialu_reg_reg);
11134 %}
11135 
11136 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11137 %{
11138   predicate(UseAPX);
11139   match(Set dst (SubI src1 src2));
11140   effect(KILL cr);
11141   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11142 
11143   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11144   ins_encode %{
11145     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11146   %}
11147   ins_pipe(ialu_reg_reg);
11148 %}
11149 
11150 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11151 %{
11152   predicate(UseAPX);
11153   match(Set dst (SubI (LoadI src1) src2));
11154   effect(KILL cr);
11155   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11156 
11157   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11158   ins_encode %{
11159     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11160   %}
11161   ins_pipe(ialu_reg_reg);
11162 %}
11163 
11164 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11165 %{
11166   predicate(!UseAPX);
11167   match(Set dst (SubI dst (LoadI src)));
11168   effect(KILL cr);
11169   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11170 
11171   ins_cost(150);
11172   format %{ "subl    $dst, $src\t# int" %}
11173   ins_encode %{
11174     __ subl($dst$$Register, $src$$Address);
11175   %}
11176   ins_pipe(ialu_reg_mem);
11177 %}
11178 
11179 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11180 %{
11181   predicate(UseAPX);
11182   match(Set dst (SubI src1 (LoadI src2)));
11183   effect(KILL cr);
11184   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11185 
11186   ins_cost(150);
11187   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11188   ins_encode %{
11189     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11190   %}
11191   ins_pipe(ialu_reg_mem);
11192 %}
11193 
11194 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11195 %{
11196   predicate(UseAPX);
11197   match(Set dst (SubI (LoadI src1) src2));
11198   effect(KILL cr);
11199   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11200 
11201   ins_cost(150);
11202   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11203   ins_encode %{
11204     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11205   %}
11206   ins_pipe(ialu_reg_mem);
11207 %}
11208 
11209 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11210 %{
11211   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11212   effect(KILL cr);
11213   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11214 
11215   ins_cost(150);
11216   format %{ "subl    $dst, $src\t# int" %}
11217   ins_encode %{
11218     __ subl($dst$$Address, $src$$Register);
11219   %}
11220   ins_pipe(ialu_mem_reg);
11221 %}
11222 
11223 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11224 %{
11225   predicate(!UseAPX);
11226   match(Set dst (SubL dst src));
11227   effect(KILL cr);
11228   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11229 
11230   format %{ "subq    $dst, $src\t# long" %}
11231   ins_encode %{
11232     __ subq($dst$$Register, $src$$Register);
11233   %}
11234   ins_pipe(ialu_reg_reg);
11235 %}
11236 
11237 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11238 %{
11239   predicate(UseAPX);
11240   match(Set dst (SubL src1 src2));
11241   effect(KILL cr);
11242   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11243 
11244   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11245   ins_encode %{
11246     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11247   %}
11248   ins_pipe(ialu_reg_reg);
11249 %}
11250 
11251 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11252 %{
11253   predicate(UseAPX);
11254   match(Set dst (SubL src1 src2));
11255   effect(KILL cr);
11256   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11257 
11258   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11259   ins_encode %{
11260     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11261   %}
11262   ins_pipe(ialu_reg_reg);
11263 %}
11264 
11265 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11266 %{
11267   predicate(UseAPX);
11268   match(Set dst (SubL (LoadL src1) src2));
11269   effect(KILL cr);
11270   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11271 
11272   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11273   ins_encode %{
11274     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11275   %}
11276   ins_pipe(ialu_reg_reg);
11277 %}
11278 
11279 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11280 %{
11281   predicate(!UseAPX);
11282   match(Set dst (SubL dst (LoadL src)));
11283   effect(KILL cr);
11284   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11285 
11286   ins_cost(150);
11287   format %{ "subq    $dst, $src\t# long" %}
11288   ins_encode %{
11289     __ subq($dst$$Register, $src$$Address);
11290   %}
11291   ins_pipe(ialu_reg_mem);
11292 %}
11293 
11294 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11295 %{
11296   predicate(UseAPX);
11297   match(Set dst (SubL src1 (LoadL src2)));
11298   effect(KILL cr);
11299   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11300 
11301   ins_cost(150);
11302   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11303   ins_encode %{
11304     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11305   %}
11306   ins_pipe(ialu_reg_mem);
11307 %}
11308 
11309 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11310 %{
11311   predicate(UseAPX);
11312   match(Set dst (SubL (LoadL src1) src2));
11313   effect(KILL cr);
11314   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11315 
11316   ins_cost(150);
11317   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11318   ins_encode %{
11319     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11320   %}
11321   ins_pipe(ialu_reg_mem);
11322 %}
11323 
11324 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11325 %{
11326   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11327   effect(KILL cr);
11328   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11329 
11330   ins_cost(150);
11331   format %{ "subq    $dst, $src\t# long" %}
11332   ins_encode %{
11333     __ subq($dst$$Address, $src$$Register);
11334   %}
11335   ins_pipe(ialu_mem_reg);
11336 %}
11337 
11338 // Subtract from a pointer
11339 // XXX hmpf???
11340 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11341 %{
11342   match(Set dst (AddP dst (SubI zero src)));
11343   effect(KILL cr);
11344 
11345   format %{ "subq    $dst, $src\t# ptr - int" %}
11346   ins_encode %{
11347     __ subq($dst$$Register, $src$$Register);
11348   %}
11349   ins_pipe(ialu_reg_reg);
11350 %}
11351 
11352 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11353 %{
11354   predicate(!UseAPX);
11355   match(Set dst (SubI zero dst));
11356   effect(KILL cr);
11357   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11358 
11359   format %{ "negl    $dst\t# int" %}
11360   ins_encode %{
11361     __ negl($dst$$Register);
11362   %}
11363   ins_pipe(ialu_reg);
11364 %}
11365 
11366 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11367 %{
11368   predicate(UseAPX);
11369   match(Set dst (SubI zero src));
11370   effect(KILL cr);
11371   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11372 
11373   format %{ "enegl    $dst, $src\t# int ndd" %}
11374   ins_encode %{
11375     __ enegl($dst$$Register, $src$$Register, false);
11376   %}
11377   ins_pipe(ialu_reg);
11378 %}
11379 
11380 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11381 %{
11382   predicate(!UseAPX);
11383   match(Set dst (NegI dst));
11384   effect(KILL cr);
11385   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11386 
11387   format %{ "negl    $dst\t# int" %}
11388   ins_encode %{
11389     __ negl($dst$$Register);
11390   %}
11391   ins_pipe(ialu_reg);
11392 %}
11393 
11394 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11395 %{
11396   predicate(UseAPX);
11397   match(Set dst (NegI src));
11398   effect(KILL cr);
11399   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11400 
11401   format %{ "enegl    $dst, $src\t# int ndd" %}
11402   ins_encode %{
11403     __ enegl($dst$$Register, $src$$Register, false);
11404   %}
11405   ins_pipe(ialu_reg);
11406 %}
11407 
11408 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11409 %{
11410   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11411   effect(KILL cr);
11412   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11413 
11414   format %{ "negl    $dst\t# int" %}
11415   ins_encode %{
11416     __ negl($dst$$Address);
11417   %}
11418   ins_pipe(ialu_reg);
11419 %}
11420 
11421 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11422 %{
11423   predicate(!UseAPX);
11424   match(Set dst (SubL zero dst));
11425   effect(KILL cr);
11426   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11427 
11428   format %{ "negq    $dst\t# long" %}
11429   ins_encode %{
11430     __ negq($dst$$Register);
11431   %}
11432   ins_pipe(ialu_reg);
11433 %}
11434 
11435 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11436 %{
11437   predicate(UseAPX);
11438   match(Set dst (SubL zero src));
11439   effect(KILL cr);
11440   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11441 
11442   format %{ "enegq    $dst, $src\t# long ndd" %}
11443   ins_encode %{
11444     __ enegq($dst$$Register, $src$$Register, false);
11445   %}
11446   ins_pipe(ialu_reg);
11447 %}
11448 
11449 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11450 %{
11451   predicate(!UseAPX);
11452   match(Set dst (NegL dst));
11453   effect(KILL cr);
11454   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11455 
11456   format %{ "negq    $dst\t# int" %}
11457   ins_encode %{
11458     __ negq($dst$$Register);
11459   %}
11460   ins_pipe(ialu_reg);
11461 %}
11462 
11463 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11464 %{
11465   predicate(UseAPX);
11466   match(Set dst (NegL src));
11467   effect(KILL cr);
11468   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11469 
11470   format %{ "enegq    $dst, $src\t# long ndd" %}
11471   ins_encode %{
11472     __ enegq($dst$$Register, $src$$Register, false);
11473   %}
11474   ins_pipe(ialu_reg);
11475 %}
11476 
11477 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11478 %{
11479   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11480   effect(KILL cr);
11481   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11482 
11483   format %{ "negq    $dst\t# long" %}
11484   ins_encode %{
11485     __ negq($dst$$Address);
11486   %}
11487   ins_pipe(ialu_reg);
11488 %}
11489 
11490 //----------Multiplication/Division Instructions-------------------------------
11491 // Integer Multiplication Instructions
11492 // Multiply Register
11493 
11494 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11495 %{
11496   predicate(!UseAPX);
11497   match(Set dst (MulI dst src));
11498   effect(KILL cr);
11499 
11500   ins_cost(300);
11501   format %{ "imull   $dst, $src\t# int" %}
11502   ins_encode %{
11503     __ imull($dst$$Register, $src$$Register);
11504   %}
11505   ins_pipe(ialu_reg_reg_alu0);
11506 %}
11507 
11508 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11509 %{
11510   predicate(UseAPX);
11511   match(Set dst (MulI src1 src2));
11512   effect(KILL cr);
11513   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11514 
11515   ins_cost(300);
11516   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11517   ins_encode %{
11518     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11519   %}
11520   ins_pipe(ialu_reg_reg_alu0);
11521 %}
11522 
11523 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11524 %{
11525   match(Set dst (MulI src imm));
11526   effect(KILL cr);
11527 
11528   ins_cost(300);
11529   format %{ "imull   $dst, $src, $imm\t# int" %}
11530   ins_encode %{
11531     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11532   %}
11533   ins_pipe(ialu_reg_reg_alu0);
11534 %}
11535 
11536 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11537 %{
11538   predicate(!UseAPX);
11539   match(Set dst (MulI dst (LoadI src)));
11540   effect(KILL cr);
11541 
11542   ins_cost(350);
11543   format %{ "imull   $dst, $src\t# int" %}
11544   ins_encode %{
11545     __ imull($dst$$Register, $src$$Address);
11546   %}
11547   ins_pipe(ialu_reg_mem_alu0);
11548 %}
11549 
11550 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11551 %{
11552   predicate(UseAPX);
11553   match(Set dst (MulI src1 (LoadI src2)));
11554   effect(KILL cr);
11555   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11556 
11557   ins_cost(350);
11558   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11559   ins_encode %{
11560     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11561   %}
11562   ins_pipe(ialu_reg_mem_alu0);
11563 %}
11564 
11565 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11566 %{
11567   match(Set dst (MulI (LoadI src) imm));
11568   effect(KILL cr);
11569 
11570   ins_cost(300);
11571   format %{ "imull   $dst, $src, $imm\t# int" %}
11572   ins_encode %{
11573     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11574   %}
11575   ins_pipe(ialu_reg_mem_alu0);
11576 %}
11577 
11578 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11579 %{
11580   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11581   effect(KILL cr, KILL src2);
11582 
11583   expand %{ mulI_rReg(dst, src1, cr);
11584            mulI_rReg(src2, src3, cr);
11585            addI_rReg(dst, src2, cr); %}
11586 %}
11587 
11588 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11589 %{
11590   predicate(!UseAPX);
11591   match(Set dst (MulL dst src));
11592   effect(KILL cr);
11593 
11594   ins_cost(300);
11595   format %{ "imulq   $dst, $src\t# long" %}
11596   ins_encode %{
11597     __ imulq($dst$$Register, $src$$Register);
11598   %}
11599   ins_pipe(ialu_reg_reg_alu0);
11600 %}
11601 
11602 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11603 %{
11604   predicate(UseAPX);
11605   match(Set dst (MulL src1 src2));
11606   effect(KILL cr);
11607   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11608 
11609   ins_cost(300);
11610   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11611   ins_encode %{
11612     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11613   %}
11614   ins_pipe(ialu_reg_reg_alu0);
11615 %}
11616 
11617 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11618 %{
11619   match(Set dst (MulL src imm));
11620   effect(KILL cr);
11621 
11622   ins_cost(300);
11623   format %{ "imulq   $dst, $src, $imm\t# long" %}
11624   ins_encode %{
11625     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11626   %}
11627   ins_pipe(ialu_reg_reg_alu0);
11628 %}
11629 
11630 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11631 %{
11632   predicate(!UseAPX);
11633   match(Set dst (MulL dst (LoadL src)));
11634   effect(KILL cr);
11635 
11636   ins_cost(350);
11637   format %{ "imulq   $dst, $src\t# long" %}
11638   ins_encode %{
11639     __ imulq($dst$$Register, $src$$Address);
11640   %}
11641   ins_pipe(ialu_reg_mem_alu0);
11642 %}
11643 
11644 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11645 %{
11646   predicate(UseAPX);
11647   match(Set dst (MulL src1 (LoadL src2)));
11648   effect(KILL cr);
11649   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11650 
11651   ins_cost(350);
11652   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11653   ins_encode %{
11654     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11655   %}
11656   ins_pipe(ialu_reg_mem_alu0);
11657 %}
11658 
11659 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11660 %{
11661   match(Set dst (MulL (LoadL src) imm));
11662   effect(KILL cr);
11663 
11664   ins_cost(300);
11665   format %{ "imulq   $dst, $src, $imm\t# long" %}
11666   ins_encode %{
11667     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11668   %}
11669   ins_pipe(ialu_reg_mem_alu0);
11670 %}
11671 
11672 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11673 %{
11674   match(Set dst (MulHiL src rax));
11675   effect(USE_KILL rax, KILL cr);
11676 
11677   ins_cost(300);
11678   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11679   ins_encode %{
11680     __ imulq($src$$Register);
11681   %}
11682   ins_pipe(ialu_reg_reg_alu0);
11683 %}
11684 
11685 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11686 %{
11687   match(Set dst (UMulHiL src rax));
11688   effect(USE_KILL rax, KILL cr);
11689 
11690   ins_cost(300);
11691   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11692   ins_encode %{
11693     __ mulq($src$$Register);
11694   %}
11695   ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697 
11698 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11699                    rFlagsReg cr)
11700 %{
11701   match(Set rax (DivI rax div));
11702   effect(KILL rdx, KILL cr);
11703 
11704   ins_cost(30*100+10*100); // XXX
11705   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11706             "jne,s   normal\n\t"
11707             "xorl    rdx, rdx\n\t"
11708             "cmpl    $div, -1\n\t"
11709             "je,s    done\n"
11710     "normal: cdql\n\t"
11711             "idivl   $div\n"
11712     "done:"        %}
11713   ins_encode(cdql_enc(div));
11714   ins_pipe(ialu_reg_reg_alu0);
11715 %}
11716 
11717 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11718                    rFlagsReg cr)
11719 %{
11720   match(Set rax (DivL rax div));
11721   effect(KILL rdx, KILL cr);
11722 
11723   ins_cost(30*100+10*100); // XXX
11724   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11725             "cmpq    rax, rdx\n\t"
11726             "jne,s   normal\n\t"
11727             "xorl    rdx, rdx\n\t"
11728             "cmpq    $div, -1\n\t"
11729             "je,s    done\n"
11730     "normal: cdqq\n\t"
11731             "idivq   $div\n"
11732     "done:"        %}
11733   ins_encode(cdqq_enc(div));
11734   ins_pipe(ialu_reg_reg_alu0);
11735 %}
11736 
11737 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11738 %{
11739   match(Set rax (UDivI rax div));
11740   effect(KILL rdx, KILL cr);
11741 
11742   ins_cost(300);
11743   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11744   ins_encode %{
11745     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11746   %}
11747   ins_pipe(ialu_reg_reg_alu0);
11748 %}
11749 
11750 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11751 %{
11752   match(Set rax (UDivL rax div));
11753   effect(KILL rdx, KILL cr);
11754 
11755   ins_cost(300);
11756   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11757   ins_encode %{
11758      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11759   %}
11760   ins_pipe(ialu_reg_reg_alu0);
11761 %}
11762 
11763 // Integer DIVMOD with Register, both quotient and mod results
11764 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11765                              rFlagsReg cr)
11766 %{
11767   match(DivModI rax div);
11768   effect(KILL cr);
11769 
11770   ins_cost(30*100+10*100); // XXX
11771   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11772             "jne,s   normal\n\t"
11773             "xorl    rdx, rdx\n\t"
11774             "cmpl    $div, -1\n\t"
11775             "je,s    done\n"
11776     "normal: cdql\n\t"
11777             "idivl   $div\n"
11778     "done:"        %}
11779   ins_encode(cdql_enc(div));
11780   ins_pipe(pipe_slow);
11781 %}
11782 
11783 // Long DIVMOD with Register, both quotient and mod results
11784 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11785                              rFlagsReg cr)
11786 %{
11787   match(DivModL rax div);
11788   effect(KILL cr);
11789 
11790   ins_cost(30*100+10*100); // XXX
11791   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11792             "cmpq    rax, rdx\n\t"
11793             "jne,s   normal\n\t"
11794             "xorl    rdx, rdx\n\t"
11795             "cmpq    $div, -1\n\t"
11796             "je,s    done\n"
11797     "normal: cdqq\n\t"
11798             "idivq   $div\n"
11799     "done:"        %}
11800   ins_encode(cdqq_enc(div));
11801   ins_pipe(pipe_slow);
11802 %}
11803 
11804 // Unsigned integer DIVMOD with Register, both quotient and mod results
11805 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11806                               no_rax_rdx_RegI div, rFlagsReg cr)
11807 %{
11808   match(UDivModI rax div);
11809   effect(TEMP tmp, KILL cr);
11810 
11811   ins_cost(300);
11812   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11813             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11814           %}
11815   ins_encode %{
11816     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11817   %}
11818   ins_pipe(pipe_slow);
11819 %}
11820 
11821 // Unsigned long DIVMOD with Register, both quotient and mod results
11822 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11823                               no_rax_rdx_RegL div, rFlagsReg cr)
11824 %{
11825   match(UDivModL rax div);
11826   effect(TEMP tmp, KILL cr);
11827 
11828   ins_cost(300);
11829   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11830             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11831           %}
11832   ins_encode %{
11833     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11834   %}
11835   ins_pipe(pipe_slow);
11836 %}
11837 
11838 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11839                    rFlagsReg cr)
11840 %{
11841   match(Set rdx (ModI rax div));
11842   effect(KILL rax, KILL cr);
11843 
11844   ins_cost(300); // XXX
11845   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11846             "jne,s   normal\n\t"
11847             "xorl    rdx, rdx\n\t"
11848             "cmpl    $div, -1\n\t"
11849             "je,s    done\n"
11850     "normal: cdql\n\t"
11851             "idivl   $div\n"
11852     "done:"        %}
11853   ins_encode(cdql_enc(div));
11854   ins_pipe(ialu_reg_reg_alu0);
11855 %}
11856 
11857 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11858                    rFlagsReg cr)
11859 %{
11860   match(Set rdx (ModL rax div));
11861   effect(KILL rax, KILL cr);
11862 
11863   ins_cost(300); // XXX
11864   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11865             "cmpq    rax, rdx\n\t"
11866             "jne,s   normal\n\t"
11867             "xorl    rdx, rdx\n\t"
11868             "cmpq    $div, -1\n\t"
11869             "je,s    done\n"
11870     "normal: cdqq\n\t"
11871             "idivq   $div\n"
11872     "done:"        %}
11873   ins_encode(cdqq_enc(div));
11874   ins_pipe(ialu_reg_reg_alu0);
11875 %}
11876 
11877 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11878 %{
11879   match(Set rdx (UModI rax div));
11880   effect(KILL rax, KILL cr);
11881 
11882   ins_cost(300);
11883   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11884   ins_encode %{
11885     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11886   %}
11887   ins_pipe(ialu_reg_reg_alu0);
11888 %}
11889 
11890 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11891 %{
11892   match(Set rdx (UModL rax div));
11893   effect(KILL rax, KILL cr);
11894 
11895   ins_cost(300);
11896   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11897   ins_encode %{
11898     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11899   %}
11900   ins_pipe(ialu_reg_reg_alu0);
11901 %}
11902 
11903 // Integer Shift Instructions
11904 // Shift Left by one, two, three
11905 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11906 %{
11907   predicate(!UseAPX);
11908   match(Set dst (LShiftI dst shift));
11909   effect(KILL cr);
11910 
11911   format %{ "sall    $dst, $shift" %}
11912   ins_encode %{
11913     __ sall($dst$$Register, $shift$$constant);
11914   %}
11915   ins_pipe(ialu_reg);
11916 %}
11917 
11918 // Shift Left by one, two, three
11919 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11920 %{
11921   predicate(UseAPX);
11922   match(Set dst (LShiftI src shift));
11923   effect(KILL cr);
11924   flag(PD::Flag_ndd_demotable_opr1);
11925 
11926   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11927   ins_encode %{
11928     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11929   %}
11930   ins_pipe(ialu_reg);
11931 %}
11932 
11933 // Shift Left by 8-bit immediate
11934 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11935 %{
11936   predicate(!UseAPX);
11937   match(Set dst (LShiftI dst shift));
11938   effect(KILL cr);
11939 
11940   format %{ "sall    $dst, $shift" %}
11941   ins_encode %{
11942     __ sall($dst$$Register, $shift$$constant);
11943   %}
11944   ins_pipe(ialu_reg);
11945 %}
11946 
11947 // Shift Left by 8-bit immediate
11948 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11949 %{
11950   predicate(UseAPX);
11951   match(Set dst (LShiftI src shift));
11952   effect(KILL cr);
11953   flag(PD::Flag_ndd_demotable_opr1);
11954 
11955   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11956   ins_encode %{
11957     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11958   %}
11959   ins_pipe(ialu_reg);
11960 %}
11961 
11962 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11963 %{
11964   predicate(UseAPX);
11965   match(Set dst (LShiftI (LoadI src) shift));
11966   effect(KILL cr);
11967 
11968   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11969   ins_encode %{
11970     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11971   %}
11972   ins_pipe(ialu_reg);
11973 %}
11974 
11975 // Shift Left by 8-bit immediate
11976 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11977 %{
11978   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11979   effect(KILL cr);
11980 
11981   format %{ "sall    $dst, $shift" %}
11982   ins_encode %{
11983     __ sall($dst$$Address, $shift$$constant);
11984   %}
11985   ins_pipe(ialu_mem_imm);
11986 %}
11987 
11988 // Shift Left by variable
11989 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11990 %{
11991   predicate(!VM_Version::supports_bmi2());
11992   match(Set dst (LShiftI dst shift));
11993   effect(KILL cr);
11994 
11995   format %{ "sall    $dst, $shift" %}
11996   ins_encode %{
11997     __ sall($dst$$Register);
11998   %}
11999   ins_pipe(ialu_reg_reg);
12000 %}
12001 
12002 // Shift Left by variable
12003 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12004 %{
12005   predicate(!VM_Version::supports_bmi2());
12006   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12007   effect(KILL cr);
12008 
12009   format %{ "sall    $dst, $shift" %}
12010   ins_encode %{
12011     __ sall($dst$$Address);
12012   %}
12013   ins_pipe(ialu_mem_reg);
12014 %}
12015 
12016 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12017 %{
12018   predicate(VM_Version::supports_bmi2());
12019   match(Set dst (LShiftI src shift));
12020 
12021   format %{ "shlxl   $dst, $src, $shift" %}
12022   ins_encode %{
12023     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12024   %}
12025   ins_pipe(ialu_reg_reg);
12026 %}
12027 
12028 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12029 %{
12030   predicate(VM_Version::supports_bmi2());
12031   match(Set dst (LShiftI (LoadI src) shift));
12032   ins_cost(175);
12033   format %{ "shlxl   $dst, $src, $shift" %}
12034   ins_encode %{
12035     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12036   %}
12037   ins_pipe(ialu_reg_mem);
12038 %}
12039 
12040 // Arithmetic Shift Right by 8-bit immediate
12041 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12042 %{
12043   predicate(!UseAPX);
12044   match(Set dst (RShiftI dst shift));
12045   effect(KILL cr);
12046 
12047   format %{ "sarl    $dst, $shift" %}
12048   ins_encode %{
12049     __ sarl($dst$$Register, $shift$$constant);
12050   %}
12051   ins_pipe(ialu_mem_imm);
12052 %}
12053 
12054 // Arithmetic Shift Right by 8-bit immediate
12055 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12056 %{
12057   predicate(UseAPX);
12058   match(Set dst (RShiftI src shift));
12059   effect(KILL cr);
12060   flag(PD::Flag_ndd_demotable_opr1);
12061 
12062   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12063   ins_encode %{
12064     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12065   %}
12066   ins_pipe(ialu_mem_imm);
12067 %}
12068 
12069 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12070 %{
12071   predicate(UseAPX);
12072   match(Set dst (RShiftI (LoadI src) shift));
12073   effect(KILL cr);
12074 
12075   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12076   ins_encode %{
12077     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12078   %}
12079   ins_pipe(ialu_mem_imm);
12080 %}
12081 
12082 // Arithmetic Shift Right by 8-bit immediate
12083 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12084 %{
12085   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12086   effect(KILL cr);
12087 
12088   format %{ "sarl    $dst, $shift" %}
12089   ins_encode %{
12090     __ sarl($dst$$Address, $shift$$constant);
12091   %}
12092   ins_pipe(ialu_mem_imm);
12093 %}
12094 
12095 // Arithmetic Shift Right by variable
12096 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12097 %{
12098   predicate(!VM_Version::supports_bmi2());
12099   match(Set dst (RShiftI dst shift));
12100   effect(KILL cr);
12101 
12102   format %{ "sarl    $dst, $shift" %}
12103   ins_encode %{
12104     __ sarl($dst$$Register);
12105   %}
12106   ins_pipe(ialu_reg_reg);
12107 %}
12108 
12109 // Arithmetic Shift Right by variable
12110 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12111 %{
12112   predicate(!VM_Version::supports_bmi2());
12113   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12114   effect(KILL cr);
12115 
12116   format %{ "sarl    $dst, $shift" %}
12117   ins_encode %{
12118     __ sarl($dst$$Address);
12119   %}
12120   ins_pipe(ialu_mem_reg);
12121 %}
12122 
12123 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12124 %{
12125   predicate(VM_Version::supports_bmi2());
12126   match(Set dst (RShiftI src shift));
12127 
12128   format %{ "sarxl   $dst, $src, $shift" %}
12129   ins_encode %{
12130     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12131   %}
12132   ins_pipe(ialu_reg_reg);
12133 %}
12134 
12135 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12136 %{
12137   predicate(VM_Version::supports_bmi2());
12138   match(Set dst (RShiftI (LoadI src) shift));
12139   ins_cost(175);
12140   format %{ "sarxl   $dst, $src, $shift" %}
12141   ins_encode %{
12142     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12143   %}
12144   ins_pipe(ialu_reg_mem);
12145 %}
12146 
12147 // Logical Shift Right by 8-bit immediate
12148 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12149 %{
12150   predicate(!UseAPX);
12151   match(Set dst (URShiftI dst shift));
12152   effect(KILL cr);
12153 
12154   format %{ "shrl    $dst, $shift" %}
12155   ins_encode %{
12156     __ shrl($dst$$Register, $shift$$constant);
12157   %}
12158   ins_pipe(ialu_reg);
12159 %}
12160 
12161 // Logical Shift Right by 8-bit immediate
12162 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12163 %{
12164   predicate(UseAPX);
12165   match(Set dst (URShiftI src shift));
12166   effect(KILL cr);
12167   flag(PD::Flag_ndd_demotable_opr1);
12168 
12169   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12170   ins_encode %{
12171     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12172   %}
12173   ins_pipe(ialu_reg);
12174 %}
12175 
12176 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12177 %{
12178   predicate(UseAPX);
12179   match(Set dst (URShiftI (LoadI src) shift));
12180   effect(KILL cr);
12181 
12182   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12183   ins_encode %{
12184     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12185   %}
12186   ins_pipe(ialu_reg);
12187 %}
12188 
12189 // Logical Shift Right by 8-bit immediate
12190 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12191 %{
12192   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12193   effect(KILL cr);
12194 
12195   format %{ "shrl    $dst, $shift" %}
12196   ins_encode %{
12197     __ shrl($dst$$Address, $shift$$constant);
12198   %}
12199   ins_pipe(ialu_mem_imm);
12200 %}
12201 
12202 // Logical Shift Right by variable
12203 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12204 %{
12205   predicate(!VM_Version::supports_bmi2());
12206   match(Set dst (URShiftI dst shift));
12207   effect(KILL cr);
12208 
12209   format %{ "shrl    $dst, $shift" %}
12210   ins_encode %{
12211     __ shrl($dst$$Register);
12212   %}
12213   ins_pipe(ialu_reg_reg);
12214 %}
12215 
12216 // Logical Shift Right by variable
12217 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12218 %{
12219   predicate(!VM_Version::supports_bmi2());
12220   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12221   effect(KILL cr);
12222 
12223   format %{ "shrl    $dst, $shift" %}
12224   ins_encode %{
12225     __ shrl($dst$$Address);
12226   %}
12227   ins_pipe(ialu_mem_reg);
12228 %}
12229 
12230 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12231 %{
12232   predicate(VM_Version::supports_bmi2());
12233   match(Set dst (URShiftI src shift));
12234 
12235   format %{ "shrxl   $dst, $src, $shift" %}
12236   ins_encode %{
12237     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12238   %}
12239   ins_pipe(ialu_reg_reg);
12240 %}
12241 
12242 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12243 %{
12244   predicate(VM_Version::supports_bmi2());
12245   match(Set dst (URShiftI (LoadI src) shift));
12246   ins_cost(175);
12247   format %{ "shrxl   $dst, $src, $shift" %}
12248   ins_encode %{
12249     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12250   %}
12251   ins_pipe(ialu_reg_mem);
12252 %}
12253 
12254 // Long Shift Instructions
12255 // Shift Left by one, two, three
12256 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12257 %{
12258   predicate(!UseAPX);
12259   match(Set dst (LShiftL dst shift));
12260   effect(KILL cr);
12261 
12262   format %{ "salq    $dst, $shift" %}
12263   ins_encode %{
12264     __ salq($dst$$Register, $shift$$constant);
12265   %}
12266   ins_pipe(ialu_reg);
12267 %}
12268 
12269 // Shift Left by one, two, three
12270 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12271 %{
12272   predicate(UseAPX);
12273   match(Set dst (LShiftL src shift));
12274   effect(KILL cr);
12275   flag(PD::Flag_ndd_demotable_opr1);
12276 
12277   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12278   ins_encode %{
12279     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12280   %}
12281   ins_pipe(ialu_reg);
12282 %}
12283 
12284 // Shift Left by 8-bit immediate
12285 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12286 %{
12287   predicate(!UseAPX);
12288   match(Set dst (LShiftL dst shift));
12289   effect(KILL cr);
12290 
12291   format %{ "salq    $dst, $shift" %}
12292   ins_encode %{
12293     __ salq($dst$$Register, $shift$$constant);
12294   %}
12295   ins_pipe(ialu_reg);
12296 %}
12297 
12298 // Shift Left by 8-bit immediate
12299 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12300 %{
12301   predicate(UseAPX);
12302   match(Set dst (LShiftL src shift));
12303   effect(KILL cr);
12304   flag(PD::Flag_ndd_demotable_opr1);
12305 
12306   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12307   ins_encode %{
12308     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12309   %}
12310   ins_pipe(ialu_reg);
12311 %}
12312 
12313 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12314 %{
12315   predicate(UseAPX);
12316   match(Set dst (LShiftL (LoadL src) shift));
12317   effect(KILL cr);
12318 
12319   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12320   ins_encode %{
12321     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12322   %}
12323   ins_pipe(ialu_reg);
12324 %}
12325 
12326 // Shift Left by 8-bit immediate
12327 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12328 %{
12329   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12330   effect(KILL cr);
12331 
12332   format %{ "salq    $dst, $shift" %}
12333   ins_encode %{
12334     __ salq($dst$$Address, $shift$$constant);
12335   %}
12336   ins_pipe(ialu_mem_imm);
12337 %}
12338 
12339 // Shift Left by variable
12340 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12341 %{
12342   predicate(!VM_Version::supports_bmi2());
12343   match(Set dst (LShiftL dst shift));
12344   effect(KILL cr);
12345 
12346   format %{ "salq    $dst, $shift" %}
12347   ins_encode %{
12348     __ salq($dst$$Register);
12349   %}
12350   ins_pipe(ialu_reg_reg);
12351 %}
12352 
12353 // Shift Left by variable
12354 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12355 %{
12356   predicate(!VM_Version::supports_bmi2());
12357   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12358   effect(KILL cr);
12359 
12360   format %{ "salq    $dst, $shift" %}
12361   ins_encode %{
12362     __ salq($dst$$Address);
12363   %}
12364   ins_pipe(ialu_mem_reg);
12365 %}
12366 
12367 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12368 %{
12369   predicate(VM_Version::supports_bmi2());
12370   match(Set dst (LShiftL src shift));
12371 
12372   format %{ "shlxq   $dst, $src, $shift" %}
12373   ins_encode %{
12374     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12375   %}
12376   ins_pipe(ialu_reg_reg);
12377 %}
12378 
12379 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12380 %{
12381   predicate(VM_Version::supports_bmi2());
12382   match(Set dst (LShiftL (LoadL src) shift));
12383   ins_cost(175);
12384   format %{ "shlxq   $dst, $src, $shift" %}
12385   ins_encode %{
12386     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12387   %}
12388   ins_pipe(ialu_reg_mem);
12389 %}
12390 
12391 // Arithmetic Shift Right by 8-bit immediate
12392 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12393 %{
12394   predicate(!UseAPX);
12395   match(Set dst (RShiftL dst shift));
12396   effect(KILL cr);
12397 
12398   format %{ "sarq    $dst, $shift" %}
12399   ins_encode %{
12400     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12401   %}
12402   ins_pipe(ialu_mem_imm);
12403 %}
12404 
12405 // Arithmetic Shift Right by 8-bit immediate
12406 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12407 %{
12408   predicate(UseAPX);
12409   match(Set dst (RShiftL src shift));
12410   effect(KILL cr);
12411   flag(PD::Flag_ndd_demotable_opr1);
12412 
12413   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12414   ins_encode %{
12415     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12416   %}
12417   ins_pipe(ialu_mem_imm);
12418 %}
12419 
12420 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12421 %{
12422   predicate(UseAPX);
12423   match(Set dst (RShiftL (LoadL src) shift));
12424   effect(KILL cr);
12425 
12426   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12427   ins_encode %{
12428     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12429   %}
12430   ins_pipe(ialu_mem_imm);
12431 %}
12432 
12433 // Arithmetic Shift Right by 8-bit immediate
12434 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12435 %{
12436   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12437   effect(KILL cr);
12438 
12439   format %{ "sarq    $dst, $shift" %}
12440   ins_encode %{
12441     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12442   %}
12443   ins_pipe(ialu_mem_imm);
12444 %}
12445 
12446 // Arithmetic Shift Right by variable
12447 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12448 %{
12449   predicate(!VM_Version::supports_bmi2());
12450   match(Set dst (RShiftL dst shift));
12451   effect(KILL cr);
12452 
12453   format %{ "sarq    $dst, $shift" %}
12454   ins_encode %{
12455     __ sarq($dst$$Register);
12456   %}
12457   ins_pipe(ialu_reg_reg);
12458 %}
12459 
12460 // Arithmetic Shift Right by variable
12461 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12462 %{
12463   predicate(!VM_Version::supports_bmi2());
12464   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12465   effect(KILL cr);
12466 
12467   format %{ "sarq    $dst, $shift" %}
12468   ins_encode %{
12469     __ sarq($dst$$Address);
12470   %}
12471   ins_pipe(ialu_mem_reg);
12472 %}
12473 
12474 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12475 %{
12476   predicate(VM_Version::supports_bmi2());
12477   match(Set dst (RShiftL src shift));
12478 
12479   format %{ "sarxq   $dst, $src, $shift" %}
12480   ins_encode %{
12481     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12482   %}
12483   ins_pipe(ialu_reg_reg);
12484 %}
12485 
12486 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12487 %{
12488   predicate(VM_Version::supports_bmi2());
12489   match(Set dst (RShiftL (LoadL src) shift));
12490   ins_cost(175);
12491   format %{ "sarxq   $dst, $src, $shift" %}
12492   ins_encode %{
12493     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12494   %}
12495   ins_pipe(ialu_reg_mem);
12496 %}
12497 
12498 // Logical Shift Right by 8-bit immediate
12499 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12500 %{
12501   predicate(!UseAPX);
12502   match(Set dst (URShiftL dst shift));
12503   effect(KILL cr);
12504 
12505   format %{ "shrq    $dst, $shift" %}
12506   ins_encode %{
12507     __ shrq($dst$$Register, $shift$$constant);
12508   %}
12509   ins_pipe(ialu_reg);
12510 %}
12511 
12512 // Logical Shift Right by 8-bit immediate
12513 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12514 %{
12515   predicate(UseAPX);
12516   match(Set dst (URShiftL src shift));
12517   effect(KILL cr);
12518   flag(PD::Flag_ndd_demotable_opr1);
12519 
12520   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12521   ins_encode %{
12522     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12523   %}
12524   ins_pipe(ialu_reg);
12525 %}
12526 
12527 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12528 %{
12529   predicate(UseAPX);
12530   match(Set dst (URShiftL (LoadL src) shift));
12531   effect(KILL cr);
12532 
12533   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12534   ins_encode %{
12535     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12536   %}
12537   ins_pipe(ialu_reg);
12538 %}
12539 
12540 // Logical Shift Right by 8-bit immediate
12541 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12542 %{
12543   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12544   effect(KILL cr);
12545 
12546   format %{ "shrq    $dst, $shift" %}
12547   ins_encode %{
12548     __ shrq($dst$$Address, $shift$$constant);
12549   %}
12550   ins_pipe(ialu_mem_imm);
12551 %}
12552 
12553 // Logical Shift Right by variable
12554 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12555 %{
12556   predicate(!VM_Version::supports_bmi2());
12557   match(Set dst (URShiftL dst shift));
12558   effect(KILL cr);
12559 
12560   format %{ "shrq    $dst, $shift" %}
12561   ins_encode %{
12562     __ shrq($dst$$Register);
12563   %}
12564   ins_pipe(ialu_reg_reg);
12565 %}
12566 
12567 // Logical Shift Right by variable
12568 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12569 %{
12570   predicate(!VM_Version::supports_bmi2());
12571   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12572   effect(KILL cr);
12573 
12574   format %{ "shrq    $dst, $shift" %}
12575   ins_encode %{
12576     __ shrq($dst$$Address);
12577   %}
12578   ins_pipe(ialu_mem_reg);
12579 %}
12580 
12581 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12582 %{
12583   predicate(VM_Version::supports_bmi2());
12584   match(Set dst (URShiftL src shift));
12585 
12586   format %{ "shrxq   $dst, $src, $shift" %}
12587   ins_encode %{
12588     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12589   %}
12590   ins_pipe(ialu_reg_reg);
12591 %}
12592 
12593 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12594 %{
12595   predicate(VM_Version::supports_bmi2());
12596   match(Set dst (URShiftL (LoadL src) shift));
12597   ins_cost(175);
12598   format %{ "shrxq   $dst, $src, $shift" %}
12599   ins_encode %{
12600     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12601   %}
12602   ins_pipe(ialu_reg_mem);
12603 %}
12604 
12605 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12606 // This idiom is used by the compiler for the i2b bytecode.
12607 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12608 %{
12609   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12610 
12611   format %{ "movsbl  $dst, $src\t# i2b" %}
12612   ins_encode %{
12613     __ movsbl($dst$$Register, $src$$Register);
12614   %}
12615   ins_pipe(ialu_reg_reg);
12616 %}
12617 
12618 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12619 // This idiom is used by the compiler the i2s bytecode.
12620 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12621 %{
12622   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12623 
12624   format %{ "movswl  $dst, $src\t# i2s" %}
12625   ins_encode %{
12626     __ movswl($dst$$Register, $src$$Register);
12627   %}
12628   ins_pipe(ialu_reg_reg);
12629 %}
12630 
12631 // ROL/ROR instructions
12632 
12633 // Rotate left by constant.
12634 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12635 %{
12636   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12637   match(Set dst (RotateLeft dst shift));
12638   effect(KILL cr);
12639   format %{ "roll    $dst, $shift" %}
12640   ins_encode %{
12641     __ roll($dst$$Register, $shift$$constant);
12642   %}
12643   ins_pipe(ialu_reg);
12644 %}
12645 
12646 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12647 %{
12648   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12649   match(Set dst (RotateLeft src shift));
12650   format %{ "rolxl   $dst, $src, $shift" %}
12651   ins_encode %{
12652     int shift = 32 - ($shift$$constant & 31);
12653     __ rorxl($dst$$Register, $src$$Register, shift);
12654   %}
12655   ins_pipe(ialu_reg_reg);
12656 %}
12657 
12658 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12659 %{
12660   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12661   match(Set dst (RotateLeft (LoadI src) shift));
12662   ins_cost(175);
12663   format %{ "rolxl   $dst, $src, $shift" %}
12664   ins_encode %{
12665     int shift = 32 - ($shift$$constant & 31);
12666     __ rorxl($dst$$Register, $src$$Address, shift);
12667   %}
12668   ins_pipe(ialu_reg_mem);
12669 %}
12670 
12671 // Rotate Left by variable
12672 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12673 %{
12674   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12675   match(Set dst (RotateLeft dst shift));
12676   effect(KILL cr);
12677   format %{ "roll    $dst, $shift" %}
12678   ins_encode %{
12679     __ roll($dst$$Register);
12680   %}
12681   ins_pipe(ialu_reg_reg);
12682 %}
12683 
12684 // Rotate Left by variable
12685 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12686 %{
12687   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12688   match(Set dst (RotateLeft src shift));
12689   effect(KILL cr);
12690   flag(PD::Flag_ndd_demotable_opr1);
12691 
12692   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12693   ins_encode %{
12694     __ eroll($dst$$Register, $src$$Register, false);
12695   %}
12696   ins_pipe(ialu_reg_reg);
12697 %}
12698 
12699 // Rotate Right by constant.
12700 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12701 %{
12702   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12703   match(Set dst (RotateRight dst shift));
12704   effect(KILL cr);
12705   format %{ "rorl    $dst, $shift" %}
12706   ins_encode %{
12707     __ rorl($dst$$Register, $shift$$constant);
12708   %}
12709   ins_pipe(ialu_reg);
12710 %}
12711 
12712 // Rotate Right by constant.
12713 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12714 %{
12715   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12716   match(Set dst (RotateRight src shift));
12717   format %{ "rorxl   $dst, $src, $shift" %}
12718   ins_encode %{
12719     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12720   %}
12721   ins_pipe(ialu_reg_reg);
12722 %}
12723 
12724 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12725 %{
12726   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12727   match(Set dst (RotateRight (LoadI src) shift));
12728   ins_cost(175);
12729   format %{ "rorxl   $dst, $src, $shift" %}
12730   ins_encode %{
12731     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12732   %}
12733   ins_pipe(ialu_reg_mem);
12734 %}
12735 
12736 // Rotate Right by variable
12737 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12738 %{
12739   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12740   match(Set dst (RotateRight dst shift));
12741   effect(KILL cr);
12742   format %{ "rorl    $dst, $shift" %}
12743   ins_encode %{
12744     __ rorl($dst$$Register);
12745   %}
12746   ins_pipe(ialu_reg_reg);
12747 %}
12748 
12749 // Rotate Right by variable
12750 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12751 %{
12752   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12753   match(Set dst (RotateRight src shift));
12754   effect(KILL cr);
12755   flag(PD::Flag_ndd_demotable_opr1);
12756 
12757   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12758   ins_encode %{
12759     __ erorl($dst$$Register, $src$$Register, false);
12760   %}
12761   ins_pipe(ialu_reg_reg);
12762 %}
12763 
12764 // Rotate Left by constant.
12765 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12766 %{
12767   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12768   match(Set dst (RotateLeft dst shift));
12769   effect(KILL cr);
12770   format %{ "rolq    $dst, $shift" %}
12771   ins_encode %{
12772     __ rolq($dst$$Register, $shift$$constant);
12773   %}
12774   ins_pipe(ialu_reg);
12775 %}
12776 
12777 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12778 %{
12779   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12780   match(Set dst (RotateLeft src shift));
12781   format %{ "rolxq   $dst, $src, $shift" %}
12782   ins_encode %{
12783     int shift = 64 - ($shift$$constant & 63);
12784     __ rorxq($dst$$Register, $src$$Register, shift);
12785   %}
12786   ins_pipe(ialu_reg_reg);
12787 %}
12788 
12789 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12790 %{
12791   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12792   match(Set dst (RotateLeft (LoadL src) shift));
12793   ins_cost(175);
12794   format %{ "rolxq   $dst, $src, $shift" %}
12795   ins_encode %{
12796     int shift = 64 - ($shift$$constant & 63);
12797     __ rorxq($dst$$Register, $src$$Address, shift);
12798   %}
12799   ins_pipe(ialu_reg_mem);
12800 %}
12801 
12802 // Rotate Left by variable
12803 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12804 %{
12805   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12806   match(Set dst (RotateLeft dst shift));
12807   effect(KILL cr);
12808 
12809   format %{ "rolq    $dst, $shift" %}
12810   ins_encode %{
12811     __ rolq($dst$$Register);
12812   %}
12813   ins_pipe(ialu_reg_reg);
12814 %}
12815 
12816 // Rotate Left by variable
12817 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12818 %{
12819   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12820   match(Set dst (RotateLeft src shift));
12821   effect(KILL cr);
12822   flag(PD::Flag_ndd_demotable_opr1);
12823 
12824   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12825   ins_encode %{
12826     __ erolq($dst$$Register, $src$$Register, false);
12827   %}
12828   ins_pipe(ialu_reg_reg);
12829 %}
12830 
12831 // Rotate Right by constant.
12832 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12833 %{
12834   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12835   match(Set dst (RotateRight dst shift));
12836   effect(KILL cr);
12837   format %{ "rorq    $dst, $shift" %}
12838   ins_encode %{
12839     __ rorq($dst$$Register, $shift$$constant);
12840   %}
12841   ins_pipe(ialu_reg);
12842 %}
12843 
12844 // Rotate Right by constant
12845 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12846 %{
12847   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12848   match(Set dst (RotateRight src shift));
12849   format %{ "rorxq   $dst, $src, $shift" %}
12850   ins_encode %{
12851     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12852   %}
12853   ins_pipe(ialu_reg_reg);
12854 %}
12855 
12856 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12857 %{
12858   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12859   match(Set dst (RotateRight (LoadL src) shift));
12860   ins_cost(175);
12861   format %{ "rorxq   $dst, $src, $shift" %}
12862   ins_encode %{
12863     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12864   %}
12865   ins_pipe(ialu_reg_mem);
12866 %}
12867 
12868 // Rotate Right by variable
12869 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12870 %{
12871   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12872   match(Set dst (RotateRight dst shift));
12873   effect(KILL cr);
12874   format %{ "rorq    $dst, $shift" %}
12875   ins_encode %{
12876     __ rorq($dst$$Register);
12877   %}
12878   ins_pipe(ialu_reg_reg);
12879 %}
12880 
12881 // Rotate Right by variable
12882 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12883 %{
12884   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12885   match(Set dst (RotateRight src shift));
12886   effect(KILL cr);
12887   flag(PD::Flag_ndd_demotable_opr1);
12888 
12889   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12890   ins_encode %{
12891     __ erorq($dst$$Register, $src$$Register, false);
12892   %}
12893   ins_pipe(ialu_reg_reg);
12894 %}
12895 
12896 //----------------------------- CompressBits/ExpandBits ------------------------
12897 
12898 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12899   predicate(n->bottom_type()->isa_long());
12900   match(Set dst (CompressBits src mask));
12901   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12902   ins_encode %{
12903     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12904   %}
12905   ins_pipe( pipe_slow );
12906 %}
12907 
12908 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12909   predicate(n->bottom_type()->isa_long());
12910   match(Set dst (ExpandBits src mask));
12911   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12912   ins_encode %{
12913     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12914   %}
12915   ins_pipe( pipe_slow );
12916 %}
12917 
12918 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12919   predicate(n->bottom_type()->isa_long());
12920   match(Set dst (CompressBits src (LoadL mask)));
12921   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12922   ins_encode %{
12923     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12924   %}
12925   ins_pipe( pipe_slow );
12926 %}
12927 
12928 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12929   predicate(n->bottom_type()->isa_long());
12930   match(Set dst (ExpandBits src (LoadL mask)));
12931   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12932   ins_encode %{
12933     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12934   %}
12935   ins_pipe( pipe_slow );
12936 %}
12937 
12938 
12939 // Logical Instructions
12940 
12941 // Integer Logical Instructions
12942 
12943 // And Instructions
12944 // And Register with Register
12945 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12946 %{
12947   predicate(!UseAPX);
12948   match(Set dst (AndI dst src));
12949   effect(KILL cr);
12950   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12951 
12952   format %{ "andl    $dst, $src\t# int" %}
12953   ins_encode %{
12954     __ andl($dst$$Register, $src$$Register);
12955   %}
12956   ins_pipe(ialu_reg_reg);
12957 %}
12958 
12959 // And Register with Register using New Data Destination (NDD)
12960 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12961 %{
12962   predicate(UseAPX);
12963   match(Set dst (AndI src1 src2));
12964   effect(KILL cr);
12965   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12966 
12967   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12968   ins_encode %{
12969     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12970 
12971   %}
12972   ins_pipe(ialu_reg_reg);
12973 %}
12974 
12975 // And Register with Immediate 255
12976 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12977 %{
12978   match(Set dst (AndI src mask));
12979 
12980   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12981   ins_encode %{
12982     __ movzbl($dst$$Register, $src$$Register);
12983   %}
12984   ins_pipe(ialu_reg);
12985 %}
12986 
12987 // And Register with Immediate 255 and promote to long
12988 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12989 %{
12990   match(Set dst (ConvI2L (AndI src mask)));
12991 
12992   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12993   ins_encode %{
12994     __ movzbl($dst$$Register, $src$$Register);
12995   %}
12996   ins_pipe(ialu_reg);
12997 %}
12998 
12999 // And Register with Immediate 65535
13000 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13001 %{
13002   match(Set dst (AndI src mask));
13003 
13004   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13005   ins_encode %{
13006     __ movzwl($dst$$Register, $src$$Register);
13007   %}
13008   ins_pipe(ialu_reg);
13009 %}
13010 
13011 // And Register with Immediate 65535 and promote to long
13012 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13013 %{
13014   match(Set dst (ConvI2L (AndI src mask)));
13015 
13016   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13017   ins_encode %{
13018     __ movzwl($dst$$Register, $src$$Register);
13019   %}
13020   ins_pipe(ialu_reg);
13021 %}
13022 
13023 // Can skip int2long conversions after AND with small bitmask
13024 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13025 %{
13026   predicate(VM_Version::supports_bmi2());
13027   ins_cost(125);
13028   effect(TEMP tmp, KILL cr);
13029   match(Set dst (ConvI2L (AndI src mask)));
13030   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13031   ins_encode %{
13032     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13033     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13034   %}
13035   ins_pipe(ialu_reg_reg);
13036 %}
13037 
13038 // And Register with Immediate
13039 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13040 %{
13041   predicate(!UseAPX);
13042   match(Set dst (AndI dst src));
13043   effect(KILL cr);
13044   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13045 
13046   format %{ "andl    $dst, $src\t# int" %}
13047   ins_encode %{
13048     __ andl($dst$$Register, $src$$constant);
13049   %}
13050   ins_pipe(ialu_reg);
13051 %}
13052 
13053 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13054 %{
13055   predicate(UseAPX);
13056   match(Set dst (AndI src1 src2));
13057   effect(KILL cr);
13058   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13059 
13060   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13061   ins_encode %{
13062     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13063   %}
13064   ins_pipe(ialu_reg);
13065 %}
13066 
13067 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13068 %{
13069   predicate(UseAPX);
13070   match(Set dst (AndI (LoadI src1) src2));
13071   effect(KILL cr);
13072   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13073 
13074   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13075   ins_encode %{
13076     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13077   %}
13078   ins_pipe(ialu_reg);
13079 %}
13080 
13081 // And Register with Memory
13082 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13083 %{
13084   predicate(!UseAPX);
13085   match(Set dst (AndI dst (LoadI src)));
13086   effect(KILL cr);
13087   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13088 
13089   ins_cost(150);
13090   format %{ "andl    $dst, $src\t# int" %}
13091   ins_encode %{
13092     __ andl($dst$$Register, $src$$Address);
13093   %}
13094   ins_pipe(ialu_reg_mem);
13095 %}
13096 
13097 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13098 %{
13099   predicate(UseAPX);
13100   match(Set dst (AndI src1 (LoadI src2)));
13101   effect(KILL cr);
13102   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13103 
13104   ins_cost(150);
13105   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13106   ins_encode %{
13107     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13108   %}
13109   ins_pipe(ialu_reg_mem);
13110 %}
13111 
13112 // And Memory with Register
13113 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13114 %{
13115   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13116   effect(KILL cr);
13117   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13118 
13119   ins_cost(150);
13120   format %{ "andb    $dst, $src\t# byte" %}
13121   ins_encode %{
13122     __ andb($dst$$Address, $src$$Register);
13123   %}
13124   ins_pipe(ialu_mem_reg);
13125 %}
13126 
13127 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13128 %{
13129   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13130   effect(KILL cr);
13131   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13132 
13133   ins_cost(150);
13134   format %{ "andl    $dst, $src\t# int" %}
13135   ins_encode %{
13136     __ andl($dst$$Address, $src$$Register);
13137   %}
13138   ins_pipe(ialu_mem_reg);
13139 %}
13140 
13141 // And Memory with Immediate
13142 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13143 %{
13144   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13145   effect(KILL cr);
13146   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13147 
13148   ins_cost(125);
13149   format %{ "andl    $dst, $src\t# int" %}
13150   ins_encode %{
13151     __ andl($dst$$Address, $src$$constant);
13152   %}
13153   ins_pipe(ialu_mem_imm);
13154 %}
13155 
13156 // BMI1 instructions
13157 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13158   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13159   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13160   effect(KILL cr);
13161   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13162 
13163   ins_cost(125);
13164   format %{ "andnl  $dst, $src1, $src2" %}
13165 
13166   ins_encode %{
13167     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13168   %}
13169   ins_pipe(ialu_reg_mem);
13170 %}
13171 
13172 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13173   match(Set dst (AndI (XorI src1 minus_1) src2));
13174   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13175   effect(KILL cr);
13176   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13177 
13178   format %{ "andnl  $dst, $src1, $src2" %}
13179 
13180   ins_encode %{
13181     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13182   %}
13183   ins_pipe(ialu_reg);
13184 %}
13185 
13186 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13187   match(Set dst (AndI (SubI imm_zero src) src));
13188   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13189   effect(KILL cr);
13190   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13191 
13192   format %{ "blsil  $dst, $src" %}
13193 
13194   ins_encode %{
13195     __ blsil($dst$$Register, $src$$Register);
13196   %}
13197   ins_pipe(ialu_reg);
13198 %}
13199 
13200 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13201   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13202   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13203   effect(KILL cr);
13204   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13205 
13206   ins_cost(125);
13207   format %{ "blsil  $dst, $src" %}
13208 
13209   ins_encode %{
13210     __ blsil($dst$$Register, $src$$Address);
13211   %}
13212   ins_pipe(ialu_reg_mem);
13213 %}
13214 
13215 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13216 %{
13217   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13218   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13219   effect(KILL cr);
13220   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13221 
13222   ins_cost(125);
13223   format %{ "blsmskl $dst, $src" %}
13224 
13225   ins_encode %{
13226     __ blsmskl($dst$$Register, $src$$Address);
13227   %}
13228   ins_pipe(ialu_reg_mem);
13229 %}
13230 
13231 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13232 %{
13233   match(Set dst (XorI (AddI src minus_1) src));
13234   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13235   effect(KILL cr);
13236   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13237 
13238   format %{ "blsmskl $dst, $src" %}
13239 
13240   ins_encode %{
13241     __ blsmskl($dst$$Register, $src$$Register);
13242   %}
13243 
13244   ins_pipe(ialu_reg);
13245 %}
13246 
13247 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13248 %{
13249   match(Set dst (AndI (AddI src minus_1) src) );
13250   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13251   effect(KILL cr);
13252   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13253 
13254   format %{ "blsrl  $dst, $src" %}
13255 
13256   ins_encode %{
13257     __ blsrl($dst$$Register, $src$$Register);
13258   %}
13259 
13260   ins_pipe(ialu_reg_mem);
13261 %}
13262 
13263 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13264 %{
13265   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13266   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13267   effect(KILL cr);
13268   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13269 
13270   ins_cost(125);
13271   format %{ "blsrl  $dst, $src" %}
13272 
13273   ins_encode %{
13274     __ blsrl($dst$$Register, $src$$Address);
13275   %}
13276 
13277   ins_pipe(ialu_reg);
13278 %}
13279 
13280 // Or Instructions
13281 // Or Register with Register
13282 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13283 %{
13284   predicate(!UseAPX);
13285   match(Set dst (OrI dst src));
13286   effect(KILL cr);
13287   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13288 
13289   format %{ "orl     $dst, $src\t# int" %}
13290   ins_encode %{
13291     __ orl($dst$$Register, $src$$Register);
13292   %}
13293   ins_pipe(ialu_reg_reg);
13294 %}
13295 
13296 // Or Register with Register using New Data Destination (NDD)
13297 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13298 %{
13299   predicate(UseAPX);
13300   match(Set dst (OrI src1 src2));
13301   effect(KILL cr);
13302   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13303 
13304   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13305   ins_encode %{
13306     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13307   %}
13308   ins_pipe(ialu_reg_reg);
13309 %}
13310 
13311 // Or Register with Immediate
13312 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13313 %{
13314   predicate(!UseAPX);
13315   match(Set dst (OrI dst src));
13316   effect(KILL cr);
13317   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13318 
13319   format %{ "orl     $dst, $src\t# int" %}
13320   ins_encode %{
13321     __ orl($dst$$Register, $src$$constant);
13322   %}
13323   ins_pipe(ialu_reg);
13324 %}
13325 
13326 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13327 %{
13328   predicate(UseAPX);
13329   match(Set dst (OrI src1 src2));
13330   effect(KILL cr);
13331   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13332 
13333   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13334   ins_encode %{
13335     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13336   %}
13337   ins_pipe(ialu_reg);
13338 %}
13339 
13340 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13341 %{
13342   predicate(UseAPX);
13343   match(Set dst (OrI src1 src2));
13344   effect(KILL cr);
13345   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13346 
13347   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13348   ins_encode %{
13349     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13350   %}
13351   ins_pipe(ialu_reg);
13352 %}
13353 
13354 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13355 %{
13356   predicate(UseAPX);
13357   match(Set dst (OrI (LoadI src1) src2));
13358   effect(KILL cr);
13359   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13360 
13361   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13362   ins_encode %{
13363     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13364   %}
13365   ins_pipe(ialu_reg);
13366 %}
13367 
13368 // Or Register with Memory
13369 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13370 %{
13371   predicate(!UseAPX);
13372   match(Set dst (OrI dst (LoadI src)));
13373   effect(KILL cr);
13374   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13375 
13376   ins_cost(150);
13377   format %{ "orl     $dst, $src\t# int" %}
13378   ins_encode %{
13379     __ orl($dst$$Register, $src$$Address);
13380   %}
13381   ins_pipe(ialu_reg_mem);
13382 %}
13383 
13384 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13385 %{
13386   predicate(UseAPX);
13387   match(Set dst (OrI src1 (LoadI src2)));
13388   effect(KILL cr);
13389   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13390 
13391   ins_cost(150);
13392   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13393   ins_encode %{
13394     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13395   %}
13396   ins_pipe(ialu_reg_mem);
13397 %}
13398 
13399 // Or Memory with Register
13400 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13401 %{
13402   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13403   effect(KILL cr);
13404   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13405 
13406   ins_cost(150);
13407   format %{ "orb    $dst, $src\t# byte" %}
13408   ins_encode %{
13409     __ orb($dst$$Address, $src$$Register);
13410   %}
13411   ins_pipe(ialu_mem_reg);
13412 %}
13413 
13414 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13415 %{
13416   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13417   effect(KILL cr);
13418   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13419 
13420   ins_cost(150);
13421   format %{ "orl     $dst, $src\t# int" %}
13422   ins_encode %{
13423     __ orl($dst$$Address, $src$$Register);
13424   %}
13425   ins_pipe(ialu_mem_reg);
13426 %}
13427 
13428 // Or Memory with Immediate
13429 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13430 %{
13431   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13432   effect(KILL cr);
13433   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13434 
13435   ins_cost(125);
13436   format %{ "orl     $dst, $src\t# int" %}
13437   ins_encode %{
13438     __ orl($dst$$Address, $src$$constant);
13439   %}
13440   ins_pipe(ialu_mem_imm);
13441 %}
13442 
13443 // Xor Instructions
13444 // Xor Register with Register
13445 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13446 %{
13447   predicate(!UseAPX);
13448   match(Set dst (XorI dst src));
13449   effect(KILL cr);
13450   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13451 
13452   format %{ "xorl    $dst, $src\t# int" %}
13453   ins_encode %{
13454     __ xorl($dst$$Register, $src$$Register);
13455   %}
13456   ins_pipe(ialu_reg_reg);
13457 %}
13458 
13459 // Xor Register with Register using New Data Destination (NDD)
13460 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13461 %{
13462   predicate(UseAPX);
13463   match(Set dst (XorI src1 src2));
13464   effect(KILL cr);
13465   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13466 
13467   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13468   ins_encode %{
13469     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13470   %}
13471   ins_pipe(ialu_reg_reg);
13472 %}
13473 
13474 // Xor Register with Immediate -1
13475 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13476 %{
13477   predicate(!UseAPX);
13478   match(Set dst (XorI dst imm));
13479 
13480   format %{ "notl    $dst" %}
13481   ins_encode %{
13482      __ notl($dst$$Register);
13483   %}
13484   ins_pipe(ialu_reg);
13485 %}
13486 
13487 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13488 %{
13489   match(Set dst (XorI src imm));
13490   predicate(UseAPX);
13491   flag(PD::Flag_ndd_demotable_opr1);
13492 
13493   format %{ "enotl    $dst, $src" %}
13494   ins_encode %{
13495      __ enotl($dst$$Register, $src$$Register);
13496   %}
13497   ins_pipe(ialu_reg);
13498 %}
13499 
13500 // Xor Register with Immediate
13501 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13502 %{
13503   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13504   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13505   match(Set dst (XorI dst src));
13506   effect(KILL cr);
13507   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13508 
13509   format %{ "xorl    $dst, $src\t# int" %}
13510   ins_encode %{
13511     __ xorl($dst$$Register, $src$$constant);
13512   %}
13513   ins_pipe(ialu_reg);
13514 %}
13515 
13516 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13517 %{
13518   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13519   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13520   match(Set dst (XorI src1 src2));
13521   effect(KILL cr);
13522   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13523 
13524   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13525   ins_encode %{
13526     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13527   %}
13528   ins_pipe(ialu_reg);
13529 %}
13530 
13531 // Xor Memory with Immediate
13532 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13533 %{
13534   predicate(UseAPX);
13535   match(Set dst (XorI (LoadI src1) src2));
13536   effect(KILL cr);
13537   ins_cost(150);
13538   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539 
13540   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13541   ins_encode %{
13542     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13543   %}
13544   ins_pipe(ialu_reg);
13545 %}
13546 
13547 // Xor Register with Memory
13548 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13549 %{
13550   predicate(!UseAPX);
13551   match(Set dst (XorI dst (LoadI src)));
13552   effect(KILL cr);
13553   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13554 
13555   ins_cost(150);
13556   format %{ "xorl    $dst, $src\t# int" %}
13557   ins_encode %{
13558     __ xorl($dst$$Register, $src$$Address);
13559   %}
13560   ins_pipe(ialu_reg_mem);
13561 %}
13562 
13563 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13564 %{
13565   predicate(UseAPX);
13566   match(Set dst (XorI src1 (LoadI src2)));
13567   effect(KILL cr);
13568   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13569 
13570   ins_cost(150);
13571   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13572   ins_encode %{
13573     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13574   %}
13575   ins_pipe(ialu_reg_mem);
13576 %}
13577 
13578 // Xor Memory with Register
13579 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13580 %{
13581   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13582   effect(KILL cr);
13583   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13584 
13585   ins_cost(150);
13586   format %{ "xorb    $dst, $src\t# byte" %}
13587   ins_encode %{
13588     __ xorb($dst$$Address, $src$$Register);
13589   %}
13590   ins_pipe(ialu_mem_reg);
13591 %}
13592 
13593 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13594 %{
13595   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13596   effect(KILL cr);
13597   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13598 
13599   ins_cost(150);
13600   format %{ "xorl    $dst, $src\t# int" %}
13601   ins_encode %{
13602     __ xorl($dst$$Address, $src$$Register);
13603   %}
13604   ins_pipe(ialu_mem_reg);
13605 %}
13606 
13607 // Xor Memory with Immediate
13608 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13609 %{
13610   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13611   effect(KILL cr);
13612   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13613 
13614   ins_cost(125);
13615   format %{ "xorl    $dst, $src\t# int" %}
13616   ins_encode %{
13617     __ xorl($dst$$Address, $src$$constant);
13618   %}
13619   ins_pipe(ialu_mem_imm);
13620 %}
13621 
13622 
13623 // Long Logical Instructions
13624 
13625 // And Instructions
13626 // And Register with Register
13627 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13628 %{
13629   predicate(!UseAPX);
13630   match(Set dst (AndL dst src));
13631   effect(KILL cr);
13632   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13633 
13634   format %{ "andq    $dst, $src\t# long" %}
13635   ins_encode %{
13636     __ andq($dst$$Register, $src$$Register);
13637   %}
13638   ins_pipe(ialu_reg_reg);
13639 %}
13640 
13641 // And Register with Register using New Data Destination (NDD)
13642 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13643 %{
13644   predicate(UseAPX);
13645   match(Set dst (AndL src1 src2));
13646   effect(KILL cr);
13647   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13648 
13649   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13650   ins_encode %{
13651     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13652 
13653   %}
13654   ins_pipe(ialu_reg_reg);
13655 %}
13656 
13657 // And Register with Immediate 255
13658 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13659 %{
13660   match(Set dst (AndL src mask));
13661 
13662   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13663   ins_encode %{
13664     // movzbl zeroes out the upper 32-bit and does not need REX.W
13665     __ movzbl($dst$$Register, $src$$Register);
13666   %}
13667   ins_pipe(ialu_reg);
13668 %}
13669 
13670 // And Register with Immediate 65535
13671 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13672 %{
13673   match(Set dst (AndL src mask));
13674 
13675   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13676   ins_encode %{
13677     // movzwl zeroes out the upper 32-bit and does not need REX.W
13678     __ movzwl($dst$$Register, $src$$Register);
13679   %}
13680   ins_pipe(ialu_reg);
13681 %}
13682 
13683 // And Register with Immediate
13684 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13685 %{
13686   predicate(!UseAPX);
13687   match(Set dst (AndL dst src));
13688   effect(KILL cr);
13689   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13690 
13691   format %{ "andq    $dst, $src\t# long" %}
13692   ins_encode %{
13693     __ andq($dst$$Register, $src$$constant);
13694   %}
13695   ins_pipe(ialu_reg);
13696 %}
13697 
13698 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13699 %{
13700   predicate(UseAPX);
13701   match(Set dst (AndL src1 src2));
13702   effect(KILL cr);
13703   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13704 
13705   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13706   ins_encode %{
13707     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13708   %}
13709   ins_pipe(ialu_reg);
13710 %}
13711 
13712 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13713 %{
13714   predicate(UseAPX);
13715   match(Set dst (AndL (LoadL src1) src2));
13716   effect(KILL cr);
13717   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13718 
13719   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13720   ins_encode %{
13721     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13722   %}
13723   ins_pipe(ialu_reg);
13724 %}
13725 
13726 // And Register with Memory
13727 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13728 %{
13729   predicate(!UseAPX);
13730   match(Set dst (AndL dst (LoadL src)));
13731   effect(KILL cr);
13732   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13733 
13734   ins_cost(150);
13735   format %{ "andq    $dst, $src\t# long" %}
13736   ins_encode %{
13737     __ andq($dst$$Register, $src$$Address);
13738   %}
13739   ins_pipe(ialu_reg_mem);
13740 %}
13741 
13742 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13743 %{
13744   predicate(UseAPX);
13745   match(Set dst (AndL src1 (LoadL src2)));
13746   effect(KILL cr);
13747   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13748 
13749   ins_cost(150);
13750   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13751   ins_encode %{
13752     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13753   %}
13754   ins_pipe(ialu_reg_mem);
13755 %}
13756 
13757 // And Memory with Register
13758 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13759 %{
13760   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13761   effect(KILL cr);
13762   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13763 
13764   ins_cost(150);
13765   format %{ "andq    $dst, $src\t# long" %}
13766   ins_encode %{
13767     __ andq($dst$$Address, $src$$Register);
13768   %}
13769   ins_pipe(ialu_mem_reg);
13770 %}
13771 
13772 // And Memory with Immediate
13773 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13774 %{
13775   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13776   effect(KILL cr);
13777   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13778 
13779   ins_cost(125);
13780   format %{ "andq    $dst, $src\t# long" %}
13781   ins_encode %{
13782     __ andq($dst$$Address, $src$$constant);
13783   %}
13784   ins_pipe(ialu_mem_imm);
13785 %}
13786 
13787 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13788 %{
13789   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13790   // because AND/OR works well enough for 8/32-bit values.
13791   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13792 
13793   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13794   effect(KILL cr);
13795 
13796   ins_cost(125);
13797   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13798   ins_encode %{
13799     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13800   %}
13801   ins_pipe(ialu_mem_imm);
13802 %}
13803 
13804 // BMI1 instructions
13805 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13806   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13807   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13808   effect(KILL cr);
13809   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13810 
13811   ins_cost(125);
13812   format %{ "andnq  $dst, $src1, $src2" %}
13813 
13814   ins_encode %{
13815     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13816   %}
13817   ins_pipe(ialu_reg_mem);
13818 %}
13819 
13820 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13821   match(Set dst (AndL (XorL src1 minus_1) src2));
13822   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13823   effect(KILL cr);
13824   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13825 
13826   format %{ "andnq  $dst, $src1, $src2" %}
13827 
13828   ins_encode %{
13829   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13830   %}
13831   ins_pipe(ialu_reg_mem);
13832 %}
13833 
13834 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13835   match(Set dst (AndL (SubL imm_zero src) src));
13836   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13837   effect(KILL cr);
13838   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13839 
13840   format %{ "blsiq  $dst, $src" %}
13841 
13842   ins_encode %{
13843     __ blsiq($dst$$Register, $src$$Register);
13844   %}
13845   ins_pipe(ialu_reg);
13846 %}
13847 
13848 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13849   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13850   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13851   effect(KILL cr);
13852   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13853 
13854   ins_cost(125);
13855   format %{ "blsiq  $dst, $src" %}
13856 
13857   ins_encode %{
13858     __ blsiq($dst$$Register, $src$$Address);
13859   %}
13860   ins_pipe(ialu_reg_mem);
13861 %}
13862 
13863 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13864 %{
13865   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13866   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13867   effect(KILL cr);
13868   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13869 
13870   ins_cost(125);
13871   format %{ "blsmskq $dst, $src" %}
13872 
13873   ins_encode %{
13874     __ blsmskq($dst$$Register, $src$$Address);
13875   %}
13876   ins_pipe(ialu_reg_mem);
13877 %}
13878 
13879 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13880 %{
13881   match(Set dst (XorL (AddL src minus_1) src));
13882   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13883   effect(KILL cr);
13884   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13885 
13886   format %{ "blsmskq $dst, $src" %}
13887 
13888   ins_encode %{
13889     __ blsmskq($dst$$Register, $src$$Register);
13890   %}
13891 
13892   ins_pipe(ialu_reg);
13893 %}
13894 
13895 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13896 %{
13897   match(Set dst (AndL (AddL src minus_1) src) );
13898   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13899   effect(KILL cr);
13900   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13901 
13902   format %{ "blsrq  $dst, $src" %}
13903 
13904   ins_encode %{
13905     __ blsrq($dst$$Register, $src$$Register);
13906   %}
13907 
13908   ins_pipe(ialu_reg);
13909 %}
13910 
13911 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13912 %{
13913   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13914   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13915   effect(KILL cr);
13916   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13917 
13918   ins_cost(125);
13919   format %{ "blsrq  $dst, $src" %}
13920 
13921   ins_encode %{
13922     __ blsrq($dst$$Register, $src$$Address);
13923   %}
13924 
13925   ins_pipe(ialu_reg);
13926 %}
13927 
13928 // Or Instructions
13929 // Or Register with Register
13930 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13931 %{
13932   predicate(!UseAPX);
13933   match(Set dst (OrL dst src));
13934   effect(KILL cr);
13935   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13936 
13937   format %{ "orq     $dst, $src\t# long" %}
13938   ins_encode %{
13939     __ orq($dst$$Register, $src$$Register);
13940   %}
13941   ins_pipe(ialu_reg_reg);
13942 %}
13943 
13944 // Or Register with Register using New Data Destination (NDD)
13945 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13946 %{
13947   predicate(UseAPX);
13948   match(Set dst (OrL src1 src2));
13949   effect(KILL cr);
13950   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13951 
13952   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13953   ins_encode %{
13954     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13955 
13956   %}
13957   ins_pipe(ialu_reg_reg);
13958 %}
13959 
13960 // Use any_RegP to match R15 (TLS register) without spilling.
13961 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13962   predicate(!UseAPX);
13963   match(Set dst (OrL dst (CastP2X src)));
13964   effect(KILL cr);
13965   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13966 
13967   format %{ "orq     $dst, $src\t# long" %}
13968   ins_encode %{
13969     __ orq($dst$$Register, $src$$Register);
13970   %}
13971   ins_pipe(ialu_reg_reg);
13972 %}
13973 
13974 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13975   predicate(UseAPX);
13976   match(Set dst (OrL src1 (CastP2X src2)));
13977   effect(KILL cr);
13978   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13979 
13980   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13981   ins_encode %{
13982     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13983   %}
13984   ins_pipe(ialu_reg_reg);
13985 %}
13986 
13987 // Or Register with Immediate
13988 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13989 %{
13990   predicate(!UseAPX);
13991   match(Set dst (OrL dst src));
13992   effect(KILL cr);
13993   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13994 
13995   format %{ "orq     $dst, $src\t# long" %}
13996   ins_encode %{
13997     __ orq($dst$$Register, $src$$constant);
13998   %}
13999   ins_pipe(ialu_reg);
14000 %}
14001 
14002 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14003 %{
14004   predicate(UseAPX);
14005   match(Set dst (OrL src1 src2));
14006   effect(KILL cr);
14007   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14008 
14009   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14010   ins_encode %{
14011     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14012   %}
14013   ins_pipe(ialu_reg);
14014 %}
14015 
14016 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14017 %{
14018   predicate(UseAPX);
14019   match(Set dst (OrL src1 src2));
14020   effect(KILL cr);
14021   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14022 
14023   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14024   ins_encode %{
14025     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14026   %}
14027   ins_pipe(ialu_reg);
14028 %}
14029 
14030 // Or Memory with Immediate
14031 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14032 %{
14033   predicate(UseAPX);
14034   match(Set dst (OrL (LoadL src1) src2));
14035   effect(KILL cr);
14036   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14037 
14038   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14039   ins_encode %{
14040     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14041   %}
14042   ins_pipe(ialu_reg);
14043 %}
14044 
14045 // Or Register with Memory
14046 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14047 %{
14048   predicate(!UseAPX);
14049   match(Set dst (OrL dst (LoadL src)));
14050   effect(KILL cr);
14051   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14052 
14053   ins_cost(150);
14054   format %{ "orq     $dst, $src\t# long" %}
14055   ins_encode %{
14056     __ orq($dst$$Register, $src$$Address);
14057   %}
14058   ins_pipe(ialu_reg_mem);
14059 %}
14060 
14061 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14062 %{
14063   predicate(UseAPX);
14064   match(Set dst (OrL src1 (LoadL src2)));
14065   effect(KILL cr);
14066   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14067 
14068   ins_cost(150);
14069   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14070   ins_encode %{
14071     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14072   %}
14073   ins_pipe(ialu_reg_mem);
14074 %}
14075 
14076 // Or Memory with Register
14077 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14078 %{
14079   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14080   effect(KILL cr);
14081   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14082 
14083   ins_cost(150);
14084   format %{ "orq     $dst, $src\t# long" %}
14085   ins_encode %{
14086     __ orq($dst$$Address, $src$$Register);
14087   %}
14088   ins_pipe(ialu_mem_reg);
14089 %}
14090 
14091 // Or Memory with Immediate
14092 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14093 %{
14094   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14095   effect(KILL cr);
14096   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14097 
14098   ins_cost(125);
14099   format %{ "orq     $dst, $src\t# long" %}
14100   ins_encode %{
14101     __ orq($dst$$Address, $src$$constant);
14102   %}
14103   ins_pipe(ialu_mem_imm);
14104 %}
14105 
14106 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14107 %{
14108   // con should be a pure 64-bit power of 2 immediate
14109   // because AND/OR works well enough for 8/32-bit values.
14110   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14111 
14112   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14113   effect(KILL cr);
14114 
14115   ins_cost(125);
14116   format %{ "btsq    $dst, log2($con)\t# long" %}
14117   ins_encode %{
14118     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14119   %}
14120   ins_pipe(ialu_mem_imm);
14121 %}
14122 
14123 // Xor Instructions
14124 // Xor Register with Register
14125 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14126 %{
14127   predicate(!UseAPX);
14128   match(Set dst (XorL dst src));
14129   effect(KILL cr);
14130   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14131 
14132   format %{ "xorq    $dst, $src\t# long" %}
14133   ins_encode %{
14134     __ xorq($dst$$Register, $src$$Register);
14135   %}
14136   ins_pipe(ialu_reg_reg);
14137 %}
14138 
14139 // Xor Register with Register using New Data Destination (NDD)
14140 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14141 %{
14142   predicate(UseAPX);
14143   match(Set dst (XorL src1 src2));
14144   effect(KILL cr);
14145   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14146 
14147   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14148   ins_encode %{
14149     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14150   %}
14151   ins_pipe(ialu_reg_reg);
14152 %}
14153 
14154 // Xor Register with Immediate -1
14155 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14156 %{
14157   predicate(!UseAPX);
14158   match(Set dst (XorL dst imm));
14159 
14160   format %{ "notq   $dst" %}
14161   ins_encode %{
14162      __ notq($dst$$Register);
14163   %}
14164   ins_pipe(ialu_reg);
14165 %}
14166 
14167 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14168 %{
14169   predicate(UseAPX);
14170   match(Set dst (XorL src imm));
14171   flag(PD::Flag_ndd_demotable_opr1);
14172 
14173   format %{ "enotq   $dst, $src" %}
14174   ins_encode %{
14175     __ enotq($dst$$Register, $src$$Register);
14176   %}
14177   ins_pipe(ialu_reg);
14178 %}
14179 
14180 // Xor Register with Immediate
14181 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14182 %{
14183   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14184   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14185   match(Set dst (XorL dst src));
14186   effect(KILL cr);
14187   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14188 
14189   format %{ "xorq    $dst, $src\t# long" %}
14190   ins_encode %{
14191     __ xorq($dst$$Register, $src$$constant);
14192   %}
14193   ins_pipe(ialu_reg);
14194 %}
14195 
14196 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14197 %{
14198   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14199   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14200   match(Set dst (XorL src1 src2));
14201   effect(KILL cr);
14202   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14203 
14204   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14205   ins_encode %{
14206     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14207   %}
14208   ins_pipe(ialu_reg);
14209 %}
14210 
14211 // Xor Memory with Immediate
14212 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14213 %{
14214   predicate(UseAPX);
14215   match(Set dst (XorL (LoadL src1) src2));
14216   effect(KILL cr);
14217   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14218   ins_cost(150);
14219 
14220   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14221   ins_encode %{
14222     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14223   %}
14224   ins_pipe(ialu_reg);
14225 %}
14226 
14227 // Xor Register with Memory
14228 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14229 %{
14230   predicate(!UseAPX);
14231   match(Set dst (XorL dst (LoadL src)));
14232   effect(KILL cr);
14233   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14234 
14235   ins_cost(150);
14236   format %{ "xorq    $dst, $src\t# long" %}
14237   ins_encode %{
14238     __ xorq($dst$$Register, $src$$Address);
14239   %}
14240   ins_pipe(ialu_reg_mem);
14241 %}
14242 
14243 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14244 %{
14245   predicate(UseAPX);
14246   match(Set dst (XorL src1 (LoadL src2)));
14247   effect(KILL cr);
14248   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14249 
14250   ins_cost(150);
14251   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14252   ins_encode %{
14253     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14254   %}
14255   ins_pipe(ialu_reg_mem);
14256 %}
14257 
14258 // Xor Memory with Register
14259 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14260 %{
14261   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14262   effect(KILL cr);
14263   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14264 
14265   ins_cost(150);
14266   format %{ "xorq    $dst, $src\t# long" %}
14267   ins_encode %{
14268     __ xorq($dst$$Address, $src$$Register);
14269   %}
14270   ins_pipe(ialu_mem_reg);
14271 %}
14272 
14273 // Xor Memory with Immediate
14274 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14275 %{
14276   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14277   effect(KILL cr);
14278   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14279 
14280   ins_cost(125);
14281   format %{ "xorq    $dst, $src\t# long" %}
14282   ins_encode %{
14283     __ xorq($dst$$Address, $src$$constant);
14284   %}
14285   ins_pipe(ialu_mem_imm);
14286 %}
14287 
14288 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14289 %{
14290   match(Set dst (CmpLTMask p q));
14291   effect(KILL cr);
14292 
14293   ins_cost(400);
14294   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14295             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14296             "negl    $dst" %}
14297   ins_encode %{
14298     __ cmpl($p$$Register, $q$$Register);
14299     __ setcc(Assembler::less, $dst$$Register);
14300     __ negl($dst$$Register);
14301   %}
14302   ins_pipe(pipe_slow);
14303 %}
14304 
14305 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14306 %{
14307   match(Set dst (CmpLTMask dst zero));
14308   effect(KILL cr);
14309 
14310   ins_cost(100);
14311   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14312   ins_encode %{
14313     __ sarl($dst$$Register, 31);
14314   %}
14315   ins_pipe(ialu_reg);
14316 %}
14317 
14318 /* Better to save a register than avoid a branch */
14319 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14320 %{
14321   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14322   effect(KILL cr);
14323   ins_cost(300);
14324   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14325             "jge     done\n\t"
14326             "addl    $p,$y\n"
14327             "done:   " %}
14328   ins_encode %{
14329     Register Rp = $p$$Register;
14330     Register Rq = $q$$Register;
14331     Register Ry = $y$$Register;
14332     Label done;
14333     __ subl(Rp, Rq);
14334     __ jccb(Assembler::greaterEqual, done);
14335     __ addl(Rp, Ry);
14336     __ bind(done);
14337   %}
14338   ins_pipe(pipe_cmplt);
14339 %}
14340 
14341 /* Better to save a register than avoid a branch */
14342 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14343 %{
14344   match(Set y (AndI (CmpLTMask p q) y));
14345   effect(KILL cr);
14346 
14347   ins_cost(300);
14348 
14349   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14350             "jlt     done\n\t"
14351             "xorl    $y, $y\n"
14352             "done:   " %}
14353   ins_encode %{
14354     Register Rp = $p$$Register;
14355     Register Rq = $q$$Register;
14356     Register Ry = $y$$Register;
14357     Label done;
14358     __ cmpl(Rp, Rq);
14359     __ jccb(Assembler::less, done);
14360     __ xorl(Ry, Ry);
14361     __ bind(done);
14362   %}
14363   ins_pipe(pipe_cmplt);
14364 %}
14365 
14366 
14367 //---------- FP Instructions------------------------------------------------
14368 
14369 // Really expensive, avoid
14370 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14371 %{
14372   match(Set cr (CmpF src1 src2));
14373 
14374   ins_cost(500);
14375   format %{ "ucomiss $src1, $src2\n\t"
14376             "jnp,s   exit\n\t"
14377             "pushfq\t# saw NaN, set CF\n\t"
14378             "andq    [rsp], #0xffffff2b\n\t"
14379             "popfq\n"
14380     "exit:" %}
14381   ins_encode %{
14382     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14383     emit_cmpfp_fixup(masm);
14384   %}
14385   ins_pipe(pipe_slow);
14386 %}
14387 
14388 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14389   match(Set cr (CmpF src1 src2));
14390 
14391   ins_cost(100);
14392   format %{ "ucomiss $src1, $src2" %}
14393   ins_encode %{
14394     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14395   %}
14396   ins_pipe(pipe_slow);
14397 %}
14398 
14399 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14400   match(Set cr (CmpF src1 src2));
14401 
14402   ins_cost(100);
14403   format %{ "evucomxss $src1, $src2" %}
14404   ins_encode %{
14405     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14406   %}
14407   ins_pipe(pipe_slow);
14408 %}
14409 
14410 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14411   match(Set cr (CmpF src1 (LoadF src2)));
14412 
14413   ins_cost(100);
14414   format %{ "ucomiss $src1, $src2" %}
14415   ins_encode %{
14416     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14417   %}
14418   ins_pipe(pipe_slow);
14419 %}
14420 
14421 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14422   match(Set cr (CmpF src1 (LoadF src2)));
14423 
14424   ins_cost(100);
14425   format %{ "evucomxss $src1, $src2" %}
14426   ins_encode %{
14427     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14428   %}
14429   ins_pipe(pipe_slow);
14430 %}
14431 
14432 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14433   match(Set cr (CmpF src con));
14434 
14435   ins_cost(100);
14436   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14437   ins_encode %{
14438     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14439   %}
14440   ins_pipe(pipe_slow);
14441 %}
14442 
14443 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14444   match(Set cr (CmpF src con));
14445 
14446   ins_cost(100);
14447   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14448   ins_encode %{
14449     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14450   %}
14451   ins_pipe(pipe_slow);
14452 %}
14453 
14454 // Really expensive, avoid
14455 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14456 %{
14457   match(Set cr (CmpD src1 src2));
14458 
14459   ins_cost(500);
14460   format %{ "ucomisd $src1, $src2\n\t"
14461             "jnp,s   exit\n\t"
14462             "pushfq\t# saw NaN, set CF\n\t"
14463             "andq    [rsp], #0xffffff2b\n\t"
14464             "popfq\n"
14465     "exit:" %}
14466   ins_encode %{
14467     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14468     emit_cmpfp_fixup(masm);
14469   %}
14470   ins_pipe(pipe_slow);
14471 %}
14472 
14473 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14474   match(Set cr (CmpD src1 src2));
14475 
14476   ins_cost(100);
14477   format %{ "ucomisd $src1, $src2 test" %}
14478   ins_encode %{
14479     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14480   %}
14481   ins_pipe(pipe_slow);
14482 %}
14483 
14484 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14485   match(Set cr (CmpD src1 src2));
14486 
14487   ins_cost(100);
14488   format %{ "evucomxsd $src1, $src2 test" %}
14489   ins_encode %{
14490     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14491   %}
14492   ins_pipe(pipe_slow);
14493 %}
14494 
14495 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14496   match(Set cr (CmpD src1 (LoadD src2)));
14497 
14498   ins_cost(100);
14499   format %{ "ucomisd $src1, $src2" %}
14500   ins_encode %{
14501     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14502   %}
14503   ins_pipe(pipe_slow);
14504 %}
14505 
14506 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14507   match(Set cr (CmpD src1 (LoadD src2)));
14508 
14509   ins_cost(100);
14510   format %{ "evucomxsd $src1, $src2" %}
14511   ins_encode %{
14512     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14513   %}
14514   ins_pipe(pipe_slow);
14515 %}
14516 
14517 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14518   match(Set cr (CmpD src con));
14519   ins_cost(100);
14520   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14521   ins_encode %{
14522     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14523   %}
14524   ins_pipe(pipe_slow);
14525 %}
14526 
14527 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14528   match(Set cr (CmpD src con));
14529 
14530   ins_cost(100);
14531   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14532   ins_encode %{
14533     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14534   %}
14535   ins_pipe(pipe_slow);
14536 %}
14537 
14538 // Compare into -1,0,1
14539 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14540 %{
14541   match(Set dst (CmpF3 src1 src2));
14542   effect(KILL cr);
14543 
14544   ins_cost(275);
14545   format %{ "ucomiss $src1, $src2\n\t"
14546             "movl    $dst, #-1\n\t"
14547             "jp,s    done\n\t"
14548             "jb,s    done\n\t"
14549             "setne   $dst\n\t"
14550             "movzbl  $dst, $dst\n"
14551     "done:" %}
14552   ins_encode %{
14553     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14554     emit_cmpfp3(masm, $dst$$Register);
14555   %}
14556   ins_pipe(pipe_slow);
14557 %}
14558 
14559 // Compare into -1,0,1
14560 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14561 %{
14562   match(Set dst (CmpF3 src1 (LoadF src2)));
14563   effect(KILL cr);
14564 
14565   ins_cost(275);
14566   format %{ "ucomiss $src1, $src2\n\t"
14567             "movl    $dst, #-1\n\t"
14568             "jp,s    done\n\t"
14569             "jb,s    done\n\t"
14570             "setne   $dst\n\t"
14571             "movzbl  $dst, $dst\n"
14572     "done:" %}
14573   ins_encode %{
14574     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14575     emit_cmpfp3(masm, $dst$$Register);
14576   %}
14577   ins_pipe(pipe_slow);
14578 %}
14579 
14580 // Compare into -1,0,1
14581 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14582   match(Set dst (CmpF3 src con));
14583   effect(KILL cr);
14584 
14585   ins_cost(275);
14586   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14587             "movl    $dst, #-1\n\t"
14588             "jp,s    done\n\t"
14589             "jb,s    done\n\t"
14590             "setne   $dst\n\t"
14591             "movzbl  $dst, $dst\n"
14592     "done:" %}
14593   ins_encode %{
14594     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14595     emit_cmpfp3(masm, $dst$$Register);
14596   %}
14597   ins_pipe(pipe_slow);
14598 %}
14599 
14600 // Compare into -1,0,1
14601 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14602 %{
14603   match(Set dst (CmpD3 src1 src2));
14604   effect(KILL cr);
14605 
14606   ins_cost(275);
14607   format %{ "ucomisd $src1, $src2\n\t"
14608             "movl    $dst, #-1\n\t"
14609             "jp,s    done\n\t"
14610             "jb,s    done\n\t"
14611             "setne   $dst\n\t"
14612             "movzbl  $dst, $dst\n"
14613     "done:" %}
14614   ins_encode %{
14615     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14616     emit_cmpfp3(masm, $dst$$Register);
14617   %}
14618   ins_pipe(pipe_slow);
14619 %}
14620 
14621 // Compare into -1,0,1
14622 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14623 %{
14624   match(Set dst (CmpD3 src1 (LoadD src2)));
14625   effect(KILL cr);
14626 
14627   ins_cost(275);
14628   format %{ "ucomisd $src1, $src2\n\t"
14629             "movl    $dst, #-1\n\t"
14630             "jp,s    done\n\t"
14631             "jb,s    done\n\t"
14632             "setne   $dst\n\t"
14633             "movzbl  $dst, $dst\n"
14634     "done:" %}
14635   ins_encode %{
14636     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14637     emit_cmpfp3(masm, $dst$$Register);
14638   %}
14639   ins_pipe(pipe_slow);
14640 %}
14641 
14642 // Compare into -1,0,1
14643 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14644   match(Set dst (CmpD3 src con));
14645   effect(KILL cr);
14646 
14647   ins_cost(275);
14648   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14649             "movl    $dst, #-1\n\t"
14650             "jp,s    done\n\t"
14651             "jb,s    done\n\t"
14652             "setne   $dst\n\t"
14653             "movzbl  $dst, $dst\n"
14654     "done:" %}
14655   ins_encode %{
14656     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14657     emit_cmpfp3(masm, $dst$$Register);
14658   %}
14659   ins_pipe(pipe_slow);
14660 %}
14661 
14662 //----------Arithmetic Conversion Instructions---------------------------------
14663 
14664 instruct convF2D_reg_reg(regD dst, regF src)
14665 %{
14666   match(Set dst (ConvF2D src));
14667 
14668   format %{ "cvtss2sd $dst, $src" %}
14669   ins_encode %{
14670     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14671   %}
14672   ins_pipe(pipe_slow); // XXX
14673 %}
14674 
14675 instruct convF2D_reg_mem(regD dst, memory src)
14676 %{
14677   predicate(UseAVX == 0);
14678   match(Set dst (ConvF2D (LoadF src)));
14679 
14680   format %{ "cvtss2sd $dst, $src" %}
14681   ins_encode %{
14682     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14683   %}
14684   ins_pipe(pipe_slow); // XXX
14685 %}
14686 
14687 instruct convD2F_reg_reg(regF dst, regD src)
14688 %{
14689   match(Set dst (ConvD2F src));
14690 
14691   format %{ "cvtsd2ss $dst, $src" %}
14692   ins_encode %{
14693     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14694   %}
14695   ins_pipe(pipe_slow); // XXX
14696 %}
14697 
14698 instruct convD2F_reg_mem(regF dst, memory src)
14699 %{
14700   predicate(UseAVX == 0);
14701   match(Set dst (ConvD2F (LoadD src)));
14702 
14703   format %{ "cvtsd2ss $dst, $src" %}
14704   ins_encode %{
14705     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14706   %}
14707   ins_pipe(pipe_slow); // XXX
14708 %}
14709 
14710 // XXX do mem variants
14711 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14712 %{
14713   predicate(!VM_Version::supports_avx10_2());
14714   match(Set dst (ConvF2I src));
14715   effect(KILL cr);
14716   format %{ "convert_f2i $dst, $src" %}
14717   ins_encode %{
14718     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14719   %}
14720   ins_pipe(pipe_slow);
14721 %}
14722 
14723 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14724 %{
14725   predicate(VM_Version::supports_avx10_2());
14726   match(Set dst (ConvF2I src));
14727   format %{ "evcvttss2sisl $dst, $src" %}
14728   ins_encode %{
14729     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14730   %}
14731   ins_pipe(pipe_slow);
14732 %}
14733 
14734 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14735 %{
14736   predicate(VM_Version::supports_avx10_2());
14737   match(Set dst (ConvF2I (LoadF src)));
14738   format %{ "evcvttss2sisl $dst, $src" %}
14739   ins_encode %{
14740     __ evcvttss2sisl($dst$$Register, $src$$Address);
14741   %}
14742   ins_pipe(pipe_slow);
14743 %}
14744 
14745 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14746 %{
14747   predicate(!VM_Version::supports_avx10_2());
14748   match(Set dst (ConvF2L src));
14749   effect(KILL cr);
14750   format %{ "convert_f2l $dst, $src"%}
14751   ins_encode %{
14752     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14753   %}
14754   ins_pipe(pipe_slow);
14755 %}
14756 
14757 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14758 %{
14759   predicate(VM_Version::supports_avx10_2());
14760   match(Set dst (ConvF2L src));
14761   format %{ "evcvttss2sisq $dst, $src" %}
14762   ins_encode %{
14763     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14764   %}
14765   ins_pipe(pipe_slow);
14766 %}
14767 
14768 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14769 %{
14770   predicate(VM_Version::supports_avx10_2());
14771   match(Set dst (ConvF2L (LoadF src)));
14772   format %{ "evcvttss2sisq $dst, $src" %}
14773   ins_encode %{
14774     __ evcvttss2sisq($dst$$Register, $src$$Address);
14775   %}
14776   ins_pipe(pipe_slow);
14777 %}
14778 
14779 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14780 %{
14781   predicate(!VM_Version::supports_avx10_2());
14782   match(Set dst (ConvD2I src));
14783   effect(KILL cr);
14784   format %{ "convert_d2i $dst, $src"%}
14785   ins_encode %{
14786     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14787   %}
14788   ins_pipe(pipe_slow);
14789 %}
14790 
14791 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14792 %{
14793   predicate(VM_Version::supports_avx10_2());
14794   match(Set dst (ConvD2I src));
14795   format %{ "evcvttsd2sisl $dst, $src" %}
14796   ins_encode %{
14797     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14798   %}
14799   ins_pipe(pipe_slow);
14800 %}
14801 
14802 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14803 %{
14804   predicate(VM_Version::supports_avx10_2());
14805   match(Set dst (ConvD2I (LoadD src)));
14806   format %{ "evcvttsd2sisl $dst, $src" %}
14807   ins_encode %{
14808     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14809   %}
14810   ins_pipe(pipe_slow);
14811 %}
14812 
14813 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14814 %{
14815   predicate(!VM_Version::supports_avx10_2());
14816   match(Set dst (ConvD2L src));
14817   effect(KILL cr);
14818   format %{ "convert_d2l $dst, $src"%}
14819   ins_encode %{
14820     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14821   %}
14822   ins_pipe(pipe_slow);
14823 %}
14824 
14825 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14826 %{
14827   predicate(VM_Version::supports_avx10_2());
14828   match(Set dst (ConvD2L src));
14829   format %{ "evcvttsd2sisq $dst, $src" %}
14830   ins_encode %{
14831     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14832   %}
14833   ins_pipe(pipe_slow);
14834 %}
14835 
14836 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14837 %{
14838   predicate(VM_Version::supports_avx10_2());
14839   match(Set dst (ConvD2L (LoadD src)));
14840   format %{ "evcvttsd2sisq $dst, $src" %}
14841   ins_encode %{
14842     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14843   %}
14844   ins_pipe(pipe_slow);
14845 %}
14846 
14847 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14848 %{
14849   match(Set dst (RoundD src));
14850   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14851   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14852   ins_encode %{
14853     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14854   %}
14855   ins_pipe(pipe_slow);
14856 %}
14857 
14858 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14859 %{
14860   match(Set dst (RoundF src));
14861   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14862   format %{ "round_float $dst,$src" %}
14863   ins_encode %{
14864     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14865   %}
14866   ins_pipe(pipe_slow);
14867 %}
14868 
14869 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14870 %{
14871   predicate(!UseXmmI2F);
14872   match(Set dst (ConvI2F src));
14873 
14874   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14875   ins_encode %{
14876     if (UseAVX > 0) {
14877       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14878     }
14879     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14880   %}
14881   ins_pipe(pipe_slow); // XXX
14882 %}
14883 
14884 instruct convI2F_reg_mem(regF dst, memory src)
14885 %{
14886   predicate(UseAVX == 0);
14887   match(Set dst (ConvI2F (LoadI src)));
14888 
14889   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14890   ins_encode %{
14891     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14892   %}
14893   ins_pipe(pipe_slow); // XXX
14894 %}
14895 
14896 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14897 %{
14898   predicate(!UseXmmI2D);
14899   match(Set dst (ConvI2D src));
14900 
14901   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14902   ins_encode %{
14903     if (UseAVX > 0) {
14904       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14905     }
14906     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14907   %}
14908   ins_pipe(pipe_slow); // XXX
14909 %}
14910 
14911 instruct convI2D_reg_mem(regD dst, memory src)
14912 %{
14913   predicate(UseAVX == 0);
14914   match(Set dst (ConvI2D (LoadI src)));
14915 
14916   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14917   ins_encode %{
14918     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14919   %}
14920   ins_pipe(pipe_slow); // XXX
14921 %}
14922 
14923 instruct convXI2F_reg(regF dst, rRegI src)
14924 %{
14925   predicate(UseXmmI2F);
14926   match(Set dst (ConvI2F src));
14927 
14928   format %{ "movdl $dst, $src\n\t"
14929             "cvtdq2psl $dst, $dst\t# i2f" %}
14930   ins_encode %{
14931     __ movdl($dst$$XMMRegister, $src$$Register);
14932     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14933   %}
14934   ins_pipe(pipe_slow); // XXX
14935 %}
14936 
14937 instruct convXI2D_reg(regD dst, rRegI src)
14938 %{
14939   predicate(UseXmmI2D);
14940   match(Set dst (ConvI2D src));
14941 
14942   format %{ "movdl $dst, $src\n\t"
14943             "cvtdq2pdl $dst, $dst\t# i2d" %}
14944   ins_encode %{
14945     __ movdl($dst$$XMMRegister, $src$$Register);
14946     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14947   %}
14948   ins_pipe(pipe_slow); // XXX
14949 %}
14950 
14951 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14952 %{
14953   match(Set dst (ConvL2F src));
14954 
14955   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14956   ins_encode %{
14957     if (UseAVX > 0) {
14958       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14959     }
14960     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14961   %}
14962   ins_pipe(pipe_slow); // XXX
14963 %}
14964 
14965 instruct convL2F_reg_mem(regF dst, memory src)
14966 %{
14967   predicate(UseAVX == 0);
14968   match(Set dst (ConvL2F (LoadL src)));
14969 
14970   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14971   ins_encode %{
14972     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14973   %}
14974   ins_pipe(pipe_slow); // XXX
14975 %}
14976 
14977 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14978 %{
14979   match(Set dst (ConvL2D src));
14980 
14981   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14982   ins_encode %{
14983     if (UseAVX > 0) {
14984       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14985     }
14986     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14987   %}
14988   ins_pipe(pipe_slow); // XXX
14989 %}
14990 
14991 instruct convL2D_reg_mem(regD dst, memory src)
14992 %{
14993   predicate(UseAVX == 0);
14994   match(Set dst (ConvL2D (LoadL src)));
14995 
14996   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14997   ins_encode %{
14998     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14999   %}
15000   ins_pipe(pipe_slow); // XXX
15001 %}
15002 
15003 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15004 %{
15005   match(Set dst (ConvI2L src));
15006 
15007   ins_cost(125);
15008   format %{ "movslq  $dst, $src\t# i2l" %}
15009   ins_encode %{
15010     __ movslq($dst$$Register, $src$$Register);
15011   %}
15012   ins_pipe(ialu_reg_reg);
15013 %}
15014 
15015 // Zero-extend convert int to long
15016 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15017 %{
15018   match(Set dst (AndL (ConvI2L src) mask));
15019 
15020   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15021   ins_encode %{
15022     if ($dst$$reg != $src$$reg) {
15023       __ movl($dst$$Register, $src$$Register);
15024     }
15025   %}
15026   ins_pipe(ialu_reg_reg);
15027 %}
15028 
15029 // Zero-extend convert int to long
15030 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15031 %{
15032   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15033 
15034   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15035   ins_encode %{
15036     __ movl($dst$$Register, $src$$Address);
15037   %}
15038   ins_pipe(ialu_reg_mem);
15039 %}
15040 
15041 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15042 %{
15043   match(Set dst (AndL src mask));
15044 
15045   format %{ "movl    $dst, $src\t# zero-extend long" %}
15046   ins_encode %{
15047     __ movl($dst$$Register, $src$$Register);
15048   %}
15049   ins_pipe(ialu_reg_reg);
15050 %}
15051 
15052 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15053 %{
15054   match(Set dst (ConvL2I src));
15055 
15056   format %{ "movl    $dst, $src\t# l2i" %}
15057   ins_encode %{
15058     __ movl($dst$$Register, $src$$Register);
15059   %}
15060   ins_pipe(ialu_reg_reg);
15061 %}
15062 
15063 
15064 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15065   match(Set dst (MoveF2I src));
15066   effect(DEF dst, USE src);
15067 
15068   ins_cost(125);
15069   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15070   ins_encode %{
15071     __ movl($dst$$Register, Address(rsp, $src$$disp));
15072   %}
15073   ins_pipe(ialu_reg_mem);
15074 %}
15075 
15076 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15077   match(Set dst (MoveI2F src));
15078   effect(DEF dst, USE src);
15079 
15080   ins_cost(125);
15081   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15082   ins_encode %{
15083     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15084   %}
15085   ins_pipe(pipe_slow);
15086 %}
15087 
15088 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15089   match(Set dst (MoveD2L src));
15090   effect(DEF dst, USE src);
15091 
15092   ins_cost(125);
15093   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15094   ins_encode %{
15095     __ movq($dst$$Register, Address(rsp, $src$$disp));
15096   %}
15097   ins_pipe(ialu_reg_mem);
15098 %}
15099 
15100 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15101   predicate(!UseXmmLoadAndClearUpper);
15102   match(Set dst (MoveL2D src));
15103   effect(DEF dst, USE src);
15104 
15105   ins_cost(125);
15106   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15107   ins_encode %{
15108     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15109   %}
15110   ins_pipe(pipe_slow);
15111 %}
15112 
15113 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15114   predicate(UseXmmLoadAndClearUpper);
15115   match(Set dst (MoveL2D src));
15116   effect(DEF dst, USE src);
15117 
15118   ins_cost(125);
15119   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15120   ins_encode %{
15121     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15122   %}
15123   ins_pipe(pipe_slow);
15124 %}
15125 
15126 
15127 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15128   match(Set dst (MoveF2I src));
15129   effect(DEF dst, USE src);
15130 
15131   ins_cost(95); // XXX
15132   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15133   ins_encode %{
15134     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15135   %}
15136   ins_pipe(pipe_slow);
15137 %}
15138 
15139 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15140   match(Set dst (MoveI2F src));
15141   effect(DEF dst, USE src);
15142 
15143   ins_cost(100);
15144   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15145   ins_encode %{
15146     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15147   %}
15148   ins_pipe( ialu_mem_reg );
15149 %}
15150 
15151 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15152   match(Set dst (MoveD2L src));
15153   effect(DEF dst, USE src);
15154 
15155   ins_cost(95); // XXX
15156   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15157   ins_encode %{
15158     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15159   %}
15160   ins_pipe(pipe_slow);
15161 %}
15162 
15163 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15164   match(Set dst (MoveL2D src));
15165   effect(DEF dst, USE src);
15166 
15167   ins_cost(100);
15168   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15169   ins_encode %{
15170     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15171   %}
15172   ins_pipe(ialu_mem_reg);
15173 %}
15174 
15175 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15176   match(Set dst (MoveF2I src));
15177   effect(DEF dst, USE src);
15178   ins_cost(85);
15179   format %{ "movd    $dst,$src\t# MoveF2I" %}
15180   ins_encode %{
15181     __ movdl($dst$$Register, $src$$XMMRegister);
15182   %}
15183   ins_pipe( pipe_slow );
15184 %}
15185 
15186 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15187   match(Set dst (MoveD2L src));
15188   effect(DEF dst, USE src);
15189   ins_cost(85);
15190   format %{ "movd    $dst,$src\t# MoveD2L" %}
15191   ins_encode %{
15192     __ movdq($dst$$Register, $src$$XMMRegister);
15193   %}
15194   ins_pipe( pipe_slow );
15195 %}
15196 
15197 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15198   match(Set dst (MoveI2F src));
15199   effect(DEF dst, USE src);
15200   ins_cost(100);
15201   format %{ "movd    $dst,$src\t# MoveI2F" %}
15202   ins_encode %{
15203     __ movdl($dst$$XMMRegister, $src$$Register);
15204   %}
15205   ins_pipe( pipe_slow );
15206 %}
15207 
15208 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15209   match(Set dst (MoveL2D src));
15210   effect(DEF dst, USE src);
15211   ins_cost(100);
15212   format %{ "movd    $dst,$src\t# MoveL2D" %}
15213   ins_encode %{
15214      __ movdq($dst$$XMMRegister, $src$$Register);
15215   %}
15216   ins_pipe( pipe_slow );
15217 %}
15218 
15219 // Fast clearing of an array
15220 // Small non-constant lenght ClearArray for non-AVX512 targets.
15221 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15222                   Universe dummy, rFlagsReg cr)
15223 %{
15224   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15225   match(Set dummy (ClearArray cnt base));
15226   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15227 
15228   format %{ $$template
15229     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15230     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15231     $$emit$$"jg      LARGE\n\t"
15232     $$emit$$"dec     rcx\n\t"
15233     $$emit$$"js      DONE\t# Zero length\n\t"
15234     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15235     $$emit$$"dec     rcx\n\t"
15236     $$emit$$"jge     LOOP\n\t"
15237     $$emit$$"jmp     DONE\n\t"
15238     $$emit$$"# LARGE:\n\t"
15239     if (UseFastStosb) {
15240        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15241        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15242     } else if (UseXMMForObjInit) {
15243        $$emit$$"mov     rdi,rax\n\t"
15244        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15245        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15246        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15247        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15248        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15249        $$emit$$"add     0x40,rax\n\t"
15250        $$emit$$"# L_zero_64_bytes:\n\t"
15251        $$emit$$"sub     0x8,rcx\n\t"
15252        $$emit$$"jge     L_loop\n\t"
15253        $$emit$$"add     0x4,rcx\n\t"
15254        $$emit$$"jl      L_tail\n\t"
15255        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15256        $$emit$$"add     0x20,rax\n\t"
15257        $$emit$$"sub     0x4,rcx\n\t"
15258        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15259        $$emit$$"add     0x4,rcx\n\t"
15260        $$emit$$"jle     L_end\n\t"
15261        $$emit$$"dec     rcx\n\t"
15262        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15263        $$emit$$"vmovq   xmm0,(rax)\n\t"
15264        $$emit$$"add     0x8,rax\n\t"
15265        $$emit$$"dec     rcx\n\t"
15266        $$emit$$"jge     L_sloop\n\t"
15267        $$emit$$"# L_end:\n\t"
15268     } else {
15269        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15270     }
15271     $$emit$$"# DONE"
15272   %}
15273   ins_encode %{
15274     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15275                  $tmp$$XMMRegister, false, knoreg);
15276   %}
15277   ins_pipe(pipe_slow);
15278 %}
15279 
15280 // Small non-constant length ClearArray for AVX512 targets.
15281 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15282                        Universe dummy, rFlagsReg cr)
15283 %{
15284   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15285   match(Set dummy (ClearArray cnt base));
15286   ins_cost(125);
15287   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15288 
15289   format %{ $$template
15290     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15291     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15292     $$emit$$"jg      LARGE\n\t"
15293     $$emit$$"dec     rcx\n\t"
15294     $$emit$$"js      DONE\t# Zero length\n\t"
15295     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15296     $$emit$$"dec     rcx\n\t"
15297     $$emit$$"jge     LOOP\n\t"
15298     $$emit$$"jmp     DONE\n\t"
15299     $$emit$$"# LARGE:\n\t"
15300     if (UseFastStosb) {
15301        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15302        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15303     } else if (UseXMMForObjInit) {
15304        $$emit$$"mov     rdi,rax\n\t"
15305        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15306        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15307        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15308        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15309        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15310        $$emit$$"add     0x40,rax\n\t"
15311        $$emit$$"# L_zero_64_bytes:\n\t"
15312        $$emit$$"sub     0x8,rcx\n\t"
15313        $$emit$$"jge     L_loop\n\t"
15314        $$emit$$"add     0x4,rcx\n\t"
15315        $$emit$$"jl      L_tail\n\t"
15316        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15317        $$emit$$"add     0x20,rax\n\t"
15318        $$emit$$"sub     0x4,rcx\n\t"
15319        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15320        $$emit$$"add     0x4,rcx\n\t"
15321        $$emit$$"jle     L_end\n\t"
15322        $$emit$$"dec     rcx\n\t"
15323        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15324        $$emit$$"vmovq   xmm0,(rax)\n\t"
15325        $$emit$$"add     0x8,rax\n\t"
15326        $$emit$$"dec     rcx\n\t"
15327        $$emit$$"jge     L_sloop\n\t"
15328        $$emit$$"# L_end:\n\t"
15329     } else {
15330        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15331     }
15332     $$emit$$"# DONE"
15333   %}
15334   ins_encode %{
15335     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15336                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15337   %}
15338   ins_pipe(pipe_slow);
15339 %}
15340 
15341 // Large non-constant length ClearArray for non-AVX512 targets.
15342 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15343                         Universe dummy, rFlagsReg cr)
15344 %{
15345   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15346   match(Set dummy (ClearArray cnt base));
15347   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15348 
15349   format %{ $$template
15350     if (UseFastStosb) {
15351        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15352        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15353        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15354     } else if (UseXMMForObjInit) {
15355        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15356        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15357        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15358        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15359        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15360        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15361        $$emit$$"add     0x40,rax\n\t"
15362        $$emit$$"# L_zero_64_bytes:\n\t"
15363        $$emit$$"sub     0x8,rcx\n\t"
15364        $$emit$$"jge     L_loop\n\t"
15365        $$emit$$"add     0x4,rcx\n\t"
15366        $$emit$$"jl      L_tail\n\t"
15367        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15368        $$emit$$"add     0x20,rax\n\t"
15369        $$emit$$"sub     0x4,rcx\n\t"
15370        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15371        $$emit$$"add     0x4,rcx\n\t"
15372        $$emit$$"jle     L_end\n\t"
15373        $$emit$$"dec     rcx\n\t"
15374        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15375        $$emit$$"vmovq   xmm0,(rax)\n\t"
15376        $$emit$$"add     0x8,rax\n\t"
15377        $$emit$$"dec     rcx\n\t"
15378        $$emit$$"jge     L_sloop\n\t"
15379        $$emit$$"# L_end:\n\t"
15380     } else {
15381        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15382        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15383     }
15384   %}
15385   ins_encode %{
15386     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15387                  $tmp$$XMMRegister, true, knoreg);
15388   %}
15389   ins_pipe(pipe_slow);
15390 %}
15391 
15392 // Large non-constant length ClearArray for AVX512 targets.
15393 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15394                              Universe dummy, rFlagsReg cr)
15395 %{
15396   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15397   match(Set dummy (ClearArray cnt base));
15398   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15399 
15400   format %{ $$template
15401     if (UseFastStosb) {
15402        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15403        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15404        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15405     } else if (UseXMMForObjInit) {
15406        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15407        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15408        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15409        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15410        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15412        $$emit$$"add     0x40,rax\n\t"
15413        $$emit$$"# L_zero_64_bytes:\n\t"
15414        $$emit$$"sub     0x8,rcx\n\t"
15415        $$emit$$"jge     L_loop\n\t"
15416        $$emit$$"add     0x4,rcx\n\t"
15417        $$emit$$"jl      L_tail\n\t"
15418        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15419        $$emit$$"add     0x20,rax\n\t"
15420        $$emit$$"sub     0x4,rcx\n\t"
15421        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15422        $$emit$$"add     0x4,rcx\n\t"
15423        $$emit$$"jle     L_end\n\t"
15424        $$emit$$"dec     rcx\n\t"
15425        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15426        $$emit$$"vmovq   xmm0,(rax)\n\t"
15427        $$emit$$"add     0x8,rax\n\t"
15428        $$emit$$"dec     rcx\n\t"
15429        $$emit$$"jge     L_sloop\n\t"
15430        $$emit$$"# L_end:\n\t"
15431     } else {
15432        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15433        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15434     }
15435   %}
15436   ins_encode %{
15437     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15438                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15439   %}
15440   ins_pipe(pipe_slow);
15441 %}
15442 
15443 // Small constant length ClearArray for AVX512 targets.
15444 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15445 %{
15446   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15447   match(Set dummy (ClearArray cnt base));
15448   ins_cost(100);
15449   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15450   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15451   ins_encode %{
15452    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15453   %}
15454   ins_pipe(pipe_slow);
15455 %}
15456 
15457 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15458                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15459 %{
15460   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15461   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15462   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15463 
15464   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15465   ins_encode %{
15466     __ string_compare($str1$$Register, $str2$$Register,
15467                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15468                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15469   %}
15470   ins_pipe( pipe_slow );
15471 %}
15472 
15473 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15474                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15475 %{
15476   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15477   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15478   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15479 
15480   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15481   ins_encode %{
15482     __ string_compare($str1$$Register, $str2$$Register,
15483                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15484                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15485   %}
15486   ins_pipe( pipe_slow );
15487 %}
15488 
15489 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15490                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15491 %{
15492   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15493   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15494   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15495 
15496   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15497   ins_encode %{
15498     __ string_compare($str1$$Register, $str2$$Register,
15499                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15500                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15501   %}
15502   ins_pipe( pipe_slow );
15503 %}
15504 
15505 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15506                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15507 %{
15508   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15509   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15510   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15511 
15512   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15513   ins_encode %{
15514     __ string_compare($str1$$Register, $str2$$Register,
15515                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15516                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15517   %}
15518   ins_pipe( pipe_slow );
15519 %}
15520 
15521 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15522                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15523 %{
15524   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15525   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15526   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15527 
15528   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15529   ins_encode %{
15530     __ string_compare($str1$$Register, $str2$$Register,
15531                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15532                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15533   %}
15534   ins_pipe( pipe_slow );
15535 %}
15536 
15537 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15538                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15539 %{
15540   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15541   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15542   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15543 
15544   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15545   ins_encode %{
15546     __ string_compare($str1$$Register, $str2$$Register,
15547                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15548                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15549   %}
15550   ins_pipe( pipe_slow );
15551 %}
15552 
15553 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15554                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15555 %{
15556   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15557   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15558   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15559 
15560   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15561   ins_encode %{
15562     __ string_compare($str2$$Register, $str1$$Register,
15563                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15564                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15565   %}
15566   ins_pipe( pipe_slow );
15567 %}
15568 
15569 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15570                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15571 %{
15572   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15573   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15574   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15575 
15576   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15577   ins_encode %{
15578     __ string_compare($str2$$Register, $str1$$Register,
15579                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15580                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15581   %}
15582   ins_pipe( pipe_slow );
15583 %}
15584 
15585 // fast search of substring with known size.
15586 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15587                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15588 %{
15589   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15590   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15591   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15592 
15593   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15594   ins_encode %{
15595     int icnt2 = (int)$int_cnt2$$constant;
15596     if (icnt2 >= 16) {
15597       // IndexOf for constant substrings with size >= 16 elements
15598       // which don't need to be loaded through stack.
15599       __ string_indexofC8($str1$$Register, $str2$$Register,
15600                           $cnt1$$Register, $cnt2$$Register,
15601                           icnt2, $result$$Register,
15602                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15603     } else {
15604       // Small strings are loaded through stack if they cross page boundary.
15605       __ string_indexof($str1$$Register, $str2$$Register,
15606                         $cnt1$$Register, $cnt2$$Register,
15607                         icnt2, $result$$Register,
15608                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15609     }
15610   %}
15611   ins_pipe( pipe_slow );
15612 %}
15613 
15614 // fast search of substring with known size.
15615 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15616                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15617 %{
15618   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15619   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15620   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15621 
15622   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15623   ins_encode %{
15624     int icnt2 = (int)$int_cnt2$$constant;
15625     if (icnt2 >= 8) {
15626       // IndexOf for constant substrings with size >= 8 elements
15627       // which don't need to be loaded through stack.
15628       __ string_indexofC8($str1$$Register, $str2$$Register,
15629                           $cnt1$$Register, $cnt2$$Register,
15630                           icnt2, $result$$Register,
15631                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15632     } else {
15633       // Small strings are loaded through stack if they cross page boundary.
15634       __ string_indexof($str1$$Register, $str2$$Register,
15635                         $cnt1$$Register, $cnt2$$Register,
15636                         icnt2, $result$$Register,
15637                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15638     }
15639   %}
15640   ins_pipe( pipe_slow );
15641 %}
15642 
15643 // fast search of substring with known size.
15644 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15645                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15646 %{
15647   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15648   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15649   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15650 
15651   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15652   ins_encode %{
15653     int icnt2 = (int)$int_cnt2$$constant;
15654     if (icnt2 >= 8) {
15655       // IndexOf for constant substrings with size >= 8 elements
15656       // which don't need to be loaded through stack.
15657       __ string_indexofC8($str1$$Register, $str2$$Register,
15658                           $cnt1$$Register, $cnt2$$Register,
15659                           icnt2, $result$$Register,
15660                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15661     } else {
15662       // Small strings are loaded through stack if they cross page boundary.
15663       __ string_indexof($str1$$Register, $str2$$Register,
15664                         $cnt1$$Register, $cnt2$$Register,
15665                         icnt2, $result$$Register,
15666                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15667     }
15668   %}
15669   ins_pipe( pipe_slow );
15670 %}
15671 
15672 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15673                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15674 %{
15675   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15676   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15677   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15678 
15679   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15680   ins_encode %{
15681     __ string_indexof($str1$$Register, $str2$$Register,
15682                       $cnt1$$Register, $cnt2$$Register,
15683                       (-1), $result$$Register,
15684                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15685   %}
15686   ins_pipe( pipe_slow );
15687 %}
15688 
15689 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15690                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15691 %{
15692   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15693   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15694   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15695 
15696   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15697   ins_encode %{
15698     __ string_indexof($str1$$Register, $str2$$Register,
15699                       $cnt1$$Register, $cnt2$$Register,
15700                       (-1), $result$$Register,
15701                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15702   %}
15703   ins_pipe( pipe_slow );
15704 %}
15705 
15706 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15707                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15708 %{
15709   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15710   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15711   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15712 
15713   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15714   ins_encode %{
15715     __ string_indexof($str1$$Register, $str2$$Register,
15716                       $cnt1$$Register, $cnt2$$Register,
15717                       (-1), $result$$Register,
15718                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15719   %}
15720   ins_pipe( pipe_slow );
15721 %}
15722 
15723 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15724                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15725 %{
15726   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15727   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15728   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15729   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15730   ins_encode %{
15731     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15732                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15733   %}
15734   ins_pipe( pipe_slow );
15735 %}
15736 
15737 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15738                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15739 %{
15740   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15741   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15742   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15743   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15744   ins_encode %{
15745     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15746                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15747   %}
15748   ins_pipe( pipe_slow );
15749 %}
15750 
15751 // fast string equals
15752 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15753                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15754 %{
15755   predicate(!VM_Version::supports_avx512vlbw());
15756   match(Set result (StrEquals (Binary str1 str2) cnt));
15757   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15758 
15759   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15760   ins_encode %{
15761     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15762                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15763                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15764   %}
15765   ins_pipe( pipe_slow );
15766 %}
15767 
15768 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15769                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15770 %{
15771   predicate(VM_Version::supports_avx512vlbw());
15772   match(Set result (StrEquals (Binary str1 str2) cnt));
15773   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15774 
15775   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15776   ins_encode %{
15777     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15778                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15779                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15780   %}
15781   ins_pipe( pipe_slow );
15782 %}
15783 
15784 // fast array equals
15785 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15786                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15787 %{
15788   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15789   match(Set result (AryEq ary1 ary2));
15790   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15791 
15792   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15793   ins_encode %{
15794     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15795                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15796                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15797   %}
15798   ins_pipe( pipe_slow );
15799 %}
15800 
15801 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15802                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15803 %{
15804   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15805   match(Set result (AryEq ary1 ary2));
15806   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15807 
15808   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15809   ins_encode %{
15810     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15811                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15812                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15813   %}
15814   ins_pipe( pipe_slow );
15815 %}
15816 
15817 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15818                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15819 %{
15820   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15821   match(Set result (AryEq ary1 ary2));
15822   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15823 
15824   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15825   ins_encode %{
15826     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15827                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15828                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15829   %}
15830   ins_pipe( pipe_slow );
15831 %}
15832 
15833 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15834                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15835 %{
15836   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15837   match(Set result (AryEq ary1 ary2));
15838   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15839 
15840   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15841   ins_encode %{
15842     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15843                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15844                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15845   %}
15846   ins_pipe( pipe_slow );
15847 %}
15848 
15849 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15850                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15851                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15852                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15853                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15854 %{
15855   predicate(UseAVX >= 2);
15856   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15857   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15858          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15859          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15860          USE basic_type, KILL cr);
15861 
15862   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15863   ins_encode %{
15864     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15865                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15866                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15867                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15868                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15869                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15870                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15871   %}
15872   ins_pipe( pipe_slow );
15873 %}
15874 
15875 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15876                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15877 %{
15878   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15879   match(Set result (CountPositives ary1 len));
15880   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15881 
15882   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15883   ins_encode %{
15884     __ count_positives($ary1$$Register, $len$$Register,
15885                        $result$$Register, $tmp3$$Register,
15886                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15887   %}
15888   ins_pipe( pipe_slow );
15889 %}
15890 
15891 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15892                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15893 %{
15894   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15895   match(Set result (CountPositives ary1 len));
15896   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15897 
15898   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15899   ins_encode %{
15900     __ count_positives($ary1$$Register, $len$$Register,
15901                        $result$$Register, $tmp3$$Register,
15902                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15903   %}
15904   ins_pipe( pipe_slow );
15905 %}
15906 
15907 // fast char[] to byte[] compression
15908 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15909                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15910   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15911   match(Set result (StrCompressedCopy src (Binary dst len)));
15912   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15913          USE_KILL len, KILL tmp5, KILL cr);
15914 
15915   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15916   ins_encode %{
15917     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15918                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15919                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15920                            knoreg, knoreg);
15921   %}
15922   ins_pipe( pipe_slow );
15923 %}
15924 
15925 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15926                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15927   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15928   match(Set result (StrCompressedCopy src (Binary dst len)));
15929   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15930          USE_KILL len, KILL tmp5, KILL cr);
15931 
15932   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15933   ins_encode %{
15934     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15935                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15936                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15937                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15938   %}
15939   ins_pipe( pipe_slow );
15940 %}
15941 // fast byte[] to char[] inflation
15942 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15943                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15944   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15945   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15946   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15947 
15948   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15949   ins_encode %{
15950     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15951                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15952   %}
15953   ins_pipe( pipe_slow );
15954 %}
15955 
15956 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15957                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15958   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15959   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15960   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15961 
15962   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15963   ins_encode %{
15964     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15965                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15966   %}
15967   ins_pipe( pipe_slow );
15968 %}
15969 
15970 // encode char[] to byte[] in ISO_8859_1
15971 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15972                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15973                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15974   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15975   match(Set result (EncodeISOArray src (Binary dst len)));
15976   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15977 
15978   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15979   ins_encode %{
15980     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15981                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15982                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15983   %}
15984   ins_pipe( pipe_slow );
15985 %}
15986 
15987 // encode char[] to byte[] in ASCII
15988 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15989                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15990                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15991   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15992   match(Set result (EncodeISOArray src (Binary dst len)));
15993   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15994 
15995   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15996   ins_encode %{
15997     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15998                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15999                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16000   %}
16001   ins_pipe( pipe_slow );
16002 %}
16003 
16004 //----------Overflow Math Instructions-----------------------------------------
16005 
16006 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16007 %{
16008   match(Set cr (OverflowAddI op1 op2));
16009   effect(DEF cr, USE_KILL op1, USE op2);
16010 
16011   format %{ "addl    $op1, $op2\t# overflow check int" %}
16012 
16013   ins_encode %{
16014     __ addl($op1$$Register, $op2$$Register);
16015   %}
16016   ins_pipe(ialu_reg_reg);
16017 %}
16018 
16019 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16020 %{
16021   match(Set cr (OverflowAddI op1 op2));
16022   effect(DEF cr, USE_KILL op1, USE op2);
16023 
16024   format %{ "addl    $op1, $op2\t# overflow check int" %}
16025 
16026   ins_encode %{
16027     __ addl($op1$$Register, $op2$$constant);
16028   %}
16029   ins_pipe(ialu_reg_reg);
16030 %}
16031 
16032 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16033 %{
16034   match(Set cr (OverflowAddL op1 op2));
16035   effect(DEF cr, USE_KILL op1, USE op2);
16036 
16037   format %{ "addq    $op1, $op2\t# overflow check long" %}
16038   ins_encode %{
16039     __ addq($op1$$Register, $op2$$Register);
16040   %}
16041   ins_pipe(ialu_reg_reg);
16042 %}
16043 
16044 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16045 %{
16046   match(Set cr (OverflowAddL op1 op2));
16047   effect(DEF cr, USE_KILL op1, USE op2);
16048 
16049   format %{ "addq    $op1, $op2\t# overflow check long" %}
16050   ins_encode %{
16051     __ addq($op1$$Register, $op2$$constant);
16052   %}
16053   ins_pipe(ialu_reg_reg);
16054 %}
16055 
16056 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16057 %{
16058   match(Set cr (OverflowSubI op1 op2));
16059 
16060   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16061   ins_encode %{
16062     __ cmpl($op1$$Register, $op2$$Register);
16063   %}
16064   ins_pipe(ialu_reg_reg);
16065 %}
16066 
16067 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16068 %{
16069   match(Set cr (OverflowSubI op1 op2));
16070 
16071   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16072   ins_encode %{
16073     __ cmpl($op1$$Register, $op2$$constant);
16074   %}
16075   ins_pipe(ialu_reg_reg);
16076 %}
16077 
16078 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16079 %{
16080   match(Set cr (OverflowSubL op1 op2));
16081 
16082   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16083   ins_encode %{
16084     __ cmpq($op1$$Register, $op2$$Register);
16085   %}
16086   ins_pipe(ialu_reg_reg);
16087 %}
16088 
16089 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16090 %{
16091   match(Set cr (OverflowSubL op1 op2));
16092 
16093   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16094   ins_encode %{
16095     __ cmpq($op1$$Register, $op2$$constant);
16096   %}
16097   ins_pipe(ialu_reg_reg);
16098 %}
16099 
16100 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16101 %{
16102   match(Set cr (OverflowSubI zero op2));
16103   effect(DEF cr, USE_KILL op2);
16104 
16105   format %{ "negl    $op2\t# overflow check int" %}
16106   ins_encode %{
16107     __ negl($op2$$Register);
16108   %}
16109   ins_pipe(ialu_reg_reg);
16110 %}
16111 
16112 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16113 %{
16114   match(Set cr (OverflowSubL zero op2));
16115   effect(DEF cr, USE_KILL op2);
16116 
16117   format %{ "negq    $op2\t# overflow check long" %}
16118   ins_encode %{
16119     __ negq($op2$$Register);
16120   %}
16121   ins_pipe(ialu_reg_reg);
16122 %}
16123 
16124 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16125 %{
16126   match(Set cr (OverflowMulI op1 op2));
16127   effect(DEF cr, USE_KILL op1, USE op2);
16128 
16129   format %{ "imull    $op1, $op2\t# overflow check int" %}
16130   ins_encode %{
16131     __ imull($op1$$Register, $op2$$Register);
16132   %}
16133   ins_pipe(ialu_reg_reg_alu0);
16134 %}
16135 
16136 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16137 %{
16138   match(Set cr (OverflowMulI op1 op2));
16139   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16140 
16141   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16142   ins_encode %{
16143     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16144   %}
16145   ins_pipe(ialu_reg_reg_alu0);
16146 %}
16147 
16148 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16149 %{
16150   match(Set cr (OverflowMulL op1 op2));
16151   effect(DEF cr, USE_KILL op1, USE op2);
16152 
16153   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16154   ins_encode %{
16155     __ imulq($op1$$Register, $op2$$Register);
16156   %}
16157   ins_pipe(ialu_reg_reg_alu0);
16158 %}
16159 
16160 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16161 %{
16162   match(Set cr (OverflowMulL op1 op2));
16163   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16164 
16165   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16166   ins_encode %{
16167     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16168   %}
16169   ins_pipe(ialu_reg_reg_alu0);
16170 %}
16171 
16172 
16173 //----------Control Flow Instructions------------------------------------------
16174 // Signed compare Instructions
16175 
16176 // XXX more variants!!
16177 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16178 %{
16179   match(Set cr (CmpI op1 op2));
16180   effect(DEF cr, USE op1, USE op2);
16181 
16182   format %{ "cmpl    $op1, $op2" %}
16183   ins_encode %{
16184     __ cmpl($op1$$Register, $op2$$Register);
16185   %}
16186   ins_pipe(ialu_cr_reg_reg);
16187 %}
16188 
16189 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16190 %{
16191   match(Set cr (CmpI op1 op2));
16192 
16193   format %{ "cmpl    $op1, $op2" %}
16194   ins_encode %{
16195     __ cmpl($op1$$Register, $op2$$constant);
16196   %}
16197   ins_pipe(ialu_cr_reg_imm);
16198 %}
16199 
16200 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16201 %{
16202   match(Set cr (CmpI op1 (LoadI op2)));
16203 
16204   ins_cost(500); // XXX
16205   format %{ "cmpl    $op1, $op2" %}
16206   ins_encode %{
16207     __ cmpl($op1$$Register, $op2$$Address);
16208   %}
16209   ins_pipe(ialu_cr_reg_mem);
16210 %}
16211 
16212 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16213 %{
16214   match(Set cr (CmpI src zero));
16215 
16216   format %{ "testl   $src, $src" %}
16217   ins_encode %{
16218     __ testl($src$$Register, $src$$Register);
16219   %}
16220   ins_pipe(ialu_cr_reg_imm);
16221 %}
16222 
16223 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16224 %{
16225   match(Set cr (CmpI (AndI src con) zero));
16226 
16227   format %{ "testl   $src, $con" %}
16228   ins_encode %{
16229     __ testl($src$$Register, $con$$constant);
16230   %}
16231   ins_pipe(ialu_cr_reg_imm);
16232 %}
16233 
16234 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16235 %{
16236   match(Set cr (CmpI (AndI src1 src2) zero));
16237 
16238   format %{ "testl   $src1, $src2" %}
16239   ins_encode %{
16240     __ testl($src1$$Register, $src2$$Register);
16241   %}
16242   ins_pipe(ialu_cr_reg_imm);
16243 %}
16244 
16245 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16246 %{
16247   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16248 
16249   format %{ "testl   $src, $mem" %}
16250   ins_encode %{
16251     __ testl($src$$Register, $mem$$Address);
16252   %}
16253   ins_pipe(ialu_cr_reg_mem);
16254 %}
16255 
16256 // Unsigned compare Instructions; really, same as signed except they
16257 // produce an rFlagsRegU instead of rFlagsReg.
16258 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16259 %{
16260   match(Set cr (CmpU op1 op2));
16261 
16262   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16263   ins_encode %{
16264     __ cmpl($op1$$Register, $op2$$Register);
16265   %}
16266   ins_pipe(ialu_cr_reg_reg);
16267 %}
16268 
16269 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16270 %{
16271   match(Set cr (CmpU op1 op2));
16272 
16273   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16274   ins_encode %{
16275     __ cmpl($op1$$Register, $op2$$constant);
16276   %}
16277   ins_pipe(ialu_cr_reg_imm);
16278 %}
16279 
16280 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16281 %{
16282   match(Set cr (CmpU op1 (LoadI op2)));
16283 
16284   ins_cost(500); // XXX
16285   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16286   ins_encode %{
16287     __ cmpl($op1$$Register, $op2$$Address);
16288   %}
16289   ins_pipe(ialu_cr_reg_mem);
16290 %}
16291 
16292 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16293 %{
16294   match(Set cr (CmpU src zero));
16295 
16296   format %{ "testl   $src, $src\t# unsigned" %}
16297   ins_encode %{
16298     __ testl($src$$Register, $src$$Register);
16299   %}
16300   ins_pipe(ialu_cr_reg_imm);
16301 %}
16302 
16303 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16304 %{
16305   match(Set cr (CmpP op1 op2));
16306 
16307   format %{ "cmpq    $op1, $op2\t# ptr" %}
16308   ins_encode %{
16309     __ cmpq($op1$$Register, $op2$$Register);
16310   %}
16311   ins_pipe(ialu_cr_reg_reg);
16312 %}
16313 
16314 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16315 %{
16316   match(Set cr (CmpP op1 (LoadP op2)));
16317   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16318 
16319   ins_cost(500); // XXX
16320   format %{ "cmpq    $op1, $op2\t# ptr" %}
16321   ins_encode %{
16322     __ cmpq($op1$$Register, $op2$$Address);
16323   %}
16324   ins_pipe(ialu_cr_reg_mem);
16325 %}
16326 
16327 // XXX this is generalized by compP_rReg_mem???
16328 // Compare raw pointer (used in out-of-heap check).
16329 // Only works because non-oop pointers must be raw pointers
16330 // and raw pointers have no anti-dependencies.
16331 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16332 %{
16333   predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
16334             n->in(2)->as_Load()->barrier_data() == 0);
16335   match(Set cr (CmpP op1 (LoadP op2)));
16336 
16337   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16338   ins_encode %{
16339     __ cmpq($op1$$Register, $op2$$Address);
16340   %}
16341   ins_pipe(ialu_cr_reg_mem);
16342 %}
16343 
16344 // This will generate a signed flags result. This should be OK since
16345 // any compare to a zero should be eq/neq.
16346 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16347 %{
16348   match(Set cr (CmpP src zero));
16349 
16350   format %{ "testq   $src, $src\t# ptr" %}
16351   ins_encode %{
16352     __ testq($src$$Register, $src$$Register);
16353   %}
16354   ins_pipe(ialu_cr_reg_imm);
16355 %}
16356 
16357 // This will generate a signed flags result. This should be OK since
16358 // any compare to a zero should be eq/neq.
16359 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16360 %{
16361   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16362             n->in(1)->as_Load()->barrier_data() == 0);
16363   match(Set cr (CmpP (LoadP op) zero));
16364 
16365   ins_cost(500); // XXX
16366   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16367   ins_encode %{
16368     __ testq($op$$Address, 0xFFFFFFFF);
16369   %}
16370   ins_pipe(ialu_cr_reg_imm);
16371 %}
16372 
16373 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16374 %{
16375   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16376             n->in(1)->as_Load()->barrier_data() == 0);
16377   match(Set cr (CmpP (LoadP mem) zero));
16378 
16379   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16380   ins_encode %{
16381     __ cmpq(r12, $mem$$Address);
16382   %}
16383   ins_pipe(ialu_cr_reg_mem);
16384 %}
16385 
16386 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16387 %{
16388   match(Set cr (CmpN op1 op2));
16389 
16390   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16391   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16392   ins_pipe(ialu_cr_reg_reg);
16393 %}
16394 
16395 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16396 %{
16397   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16398   match(Set cr (CmpN src (LoadN mem)));
16399 
16400   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16401   ins_encode %{
16402     __ cmpl($src$$Register, $mem$$Address);
16403   %}
16404   ins_pipe(ialu_cr_reg_mem);
16405 %}
16406 
16407 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16408   match(Set cr (CmpN op1 op2));
16409 
16410   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16411   ins_encode %{
16412     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16413   %}
16414   ins_pipe(ialu_cr_reg_imm);
16415 %}
16416 
16417 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16418 %{
16419   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16420   match(Set cr (CmpN src (LoadN mem)));
16421 
16422   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16423   ins_encode %{
16424     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16425   %}
16426   ins_pipe(ialu_cr_reg_mem);
16427 %}
16428 
16429 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16430   match(Set cr (CmpN op1 op2));
16431 
16432   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16433   ins_encode %{
16434     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16435   %}
16436   ins_pipe(ialu_cr_reg_imm);
16437 %}
16438 
16439 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16440 %{
16441   predicate(!UseCompactObjectHeaders);
16442   match(Set cr (CmpN src (LoadNKlass mem)));
16443 
16444   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16445   ins_encode %{
16446     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16447   %}
16448   ins_pipe(ialu_cr_reg_mem);
16449 %}
16450 
16451 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16452   match(Set cr (CmpN src zero));
16453 
16454   format %{ "testl   $src, $src\t# compressed ptr" %}
16455   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16456   ins_pipe(ialu_cr_reg_imm);
16457 %}
16458 
16459 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16460 %{
16461   predicate(CompressedOops::base() != nullptr &&
16462             n->in(1)->as_Load()->barrier_data() == 0);
16463   match(Set cr (CmpN (LoadN mem) zero));
16464 
16465   ins_cost(500); // XXX
16466   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16467   ins_encode %{
16468     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16469   %}
16470   ins_pipe(ialu_cr_reg_mem);
16471 %}
16472 
16473 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16474 %{
16475   predicate(CompressedOops::base() == nullptr &&
16476             n->in(1)->as_Load()->barrier_data() == 0);
16477   match(Set cr (CmpN (LoadN mem) zero));
16478 
16479   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16480   ins_encode %{
16481     __ cmpl(r12, $mem$$Address);
16482   %}
16483   ins_pipe(ialu_cr_reg_mem);
16484 %}
16485 
16486 // Yanked all unsigned pointer compare operations.
16487 // Pointer compares are done with CmpP which is already unsigned.
16488 
16489 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16490 %{
16491   match(Set cr (CmpL op1 op2));
16492 
16493   format %{ "cmpq    $op1, $op2" %}
16494   ins_encode %{
16495     __ cmpq($op1$$Register, $op2$$Register);
16496   %}
16497   ins_pipe(ialu_cr_reg_reg);
16498 %}
16499 
16500 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16501 %{
16502   match(Set cr (CmpL op1 op2));
16503 
16504   format %{ "cmpq    $op1, $op2" %}
16505   ins_encode %{
16506     __ cmpq($op1$$Register, $op2$$constant);
16507   %}
16508   ins_pipe(ialu_cr_reg_imm);
16509 %}
16510 
16511 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16512 %{
16513   match(Set cr (CmpL op1 (LoadL op2)));
16514 
16515   format %{ "cmpq    $op1, $op2" %}
16516   ins_encode %{
16517     __ cmpq($op1$$Register, $op2$$Address);
16518   %}
16519   ins_pipe(ialu_cr_reg_mem);
16520 %}
16521 
16522 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16523 %{
16524   match(Set cr (CmpL src zero));
16525 
16526   format %{ "testq   $src, $src" %}
16527   ins_encode %{
16528     __ testq($src$$Register, $src$$Register);
16529   %}
16530   ins_pipe(ialu_cr_reg_imm);
16531 %}
16532 
16533 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16534 %{
16535   match(Set cr (CmpL (AndL src con) zero));
16536 
16537   format %{ "testq   $src, $con\t# long" %}
16538   ins_encode %{
16539     __ testq($src$$Register, $con$$constant);
16540   %}
16541   ins_pipe(ialu_cr_reg_imm);
16542 %}
16543 
16544 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16545 %{
16546   match(Set cr (CmpL (AndL src1 src2) zero));
16547 
16548   format %{ "testq   $src1, $src2\t# long" %}
16549   ins_encode %{
16550     __ testq($src1$$Register, $src2$$Register);
16551   %}
16552   ins_pipe(ialu_cr_reg_imm);
16553 %}
16554 
16555 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16556 %{
16557   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16558 
16559   format %{ "testq   $src, $mem" %}
16560   ins_encode %{
16561     __ testq($src$$Register, $mem$$Address);
16562   %}
16563   ins_pipe(ialu_cr_reg_mem);
16564 %}
16565 
16566 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16567 %{
16568   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16569 
16570   format %{ "testq   $src, $mem" %}
16571   ins_encode %{
16572     __ testq($src$$Register, $mem$$Address);
16573   %}
16574   ins_pipe(ialu_cr_reg_mem);
16575 %}
16576 
16577 // Manifest a CmpU result in an integer register.  Very painful.
16578 // This is the test to avoid.
16579 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16580 %{
16581   match(Set dst (CmpU3 src1 src2));
16582   effect(KILL flags);
16583 
16584   ins_cost(275); // XXX
16585   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16586             "movl    $dst, -1\n\t"
16587             "jb,u    done\n\t"
16588             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16589     "done:" %}
16590   ins_encode %{
16591     Label done;
16592     __ cmpl($src1$$Register, $src2$$Register);
16593     __ movl($dst$$Register, -1);
16594     __ jccb(Assembler::below, done);
16595     __ setcc(Assembler::notZero, $dst$$Register);
16596     __ bind(done);
16597   %}
16598   ins_pipe(pipe_slow);
16599 %}
16600 
16601 // Manifest a CmpL result in an integer register.  Very painful.
16602 // This is the test to avoid.
16603 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16604 %{
16605   match(Set dst (CmpL3 src1 src2));
16606   effect(KILL flags);
16607 
16608   ins_cost(275); // XXX
16609   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16610             "movl    $dst, -1\n\t"
16611             "jl,s    done\n\t"
16612             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16613     "done:" %}
16614   ins_encode %{
16615     Label done;
16616     __ cmpq($src1$$Register, $src2$$Register);
16617     __ movl($dst$$Register, -1);
16618     __ jccb(Assembler::less, done);
16619     __ setcc(Assembler::notZero, $dst$$Register);
16620     __ bind(done);
16621   %}
16622   ins_pipe(pipe_slow);
16623 %}
16624 
16625 // Manifest a CmpUL result in an integer register.  Very painful.
16626 // This is the test to avoid.
16627 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16628 %{
16629   match(Set dst (CmpUL3 src1 src2));
16630   effect(KILL flags);
16631 
16632   ins_cost(275); // XXX
16633   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16634             "movl    $dst, -1\n\t"
16635             "jb,u    done\n\t"
16636             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16637     "done:" %}
16638   ins_encode %{
16639     Label done;
16640     __ cmpq($src1$$Register, $src2$$Register);
16641     __ movl($dst$$Register, -1);
16642     __ jccb(Assembler::below, done);
16643     __ setcc(Assembler::notZero, $dst$$Register);
16644     __ bind(done);
16645   %}
16646   ins_pipe(pipe_slow);
16647 %}
16648 
16649 // Unsigned long compare Instructions; really, same as signed long except they
16650 // produce an rFlagsRegU instead of rFlagsReg.
16651 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16652 %{
16653   match(Set cr (CmpUL op1 op2));
16654 
16655   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16656   ins_encode %{
16657     __ cmpq($op1$$Register, $op2$$Register);
16658   %}
16659   ins_pipe(ialu_cr_reg_reg);
16660 %}
16661 
16662 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16663 %{
16664   match(Set cr (CmpUL op1 op2));
16665 
16666   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16667   ins_encode %{
16668     __ cmpq($op1$$Register, $op2$$constant);
16669   %}
16670   ins_pipe(ialu_cr_reg_imm);
16671 %}
16672 
16673 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16674 %{
16675   match(Set cr (CmpUL op1 (LoadL op2)));
16676 
16677   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16678   ins_encode %{
16679     __ cmpq($op1$$Register, $op2$$Address);
16680   %}
16681   ins_pipe(ialu_cr_reg_mem);
16682 %}
16683 
16684 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16685 %{
16686   match(Set cr (CmpUL src zero));
16687 
16688   format %{ "testq   $src, $src\t# unsigned" %}
16689   ins_encode %{
16690     __ testq($src$$Register, $src$$Register);
16691   %}
16692   ins_pipe(ialu_cr_reg_imm);
16693 %}
16694 
16695 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16696 %{
16697   match(Set cr (CmpI (LoadB mem) imm));
16698 
16699   ins_cost(125);
16700   format %{ "cmpb    $mem, $imm" %}
16701   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16702   ins_pipe(ialu_cr_reg_mem);
16703 %}
16704 
16705 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16706 %{
16707   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16708 
16709   ins_cost(125);
16710   format %{ "testb   $mem, $imm\t# ubyte" %}
16711   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16712   ins_pipe(ialu_cr_reg_mem);
16713 %}
16714 
16715 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16716 %{
16717   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16718 
16719   ins_cost(125);
16720   format %{ "testb   $mem, $imm\t# byte" %}
16721   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16722   ins_pipe(ialu_cr_reg_mem);
16723 %}
16724 
16725 //----------Max and Min--------------------------------------------------------
16726 // Min Instructions
16727 
16728 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16729 %{
16730   predicate(!UseAPX);
16731   effect(USE_DEF dst, USE src, USE cr);
16732 
16733   format %{ "cmovlgt $dst, $src\t# min" %}
16734   ins_encode %{
16735     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16736   %}
16737   ins_pipe(pipe_cmov_reg);
16738 %}
16739 
16740 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16741 %{
16742   predicate(UseAPX);
16743   effect(DEF dst, USE src1, USE src2, USE cr);
16744 
16745   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16746   ins_encode %{
16747     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16748   %}
16749   ins_pipe(pipe_cmov_reg);
16750 %}
16751 
16752 instruct minI_rReg(rRegI dst, rRegI src)
16753 %{
16754   predicate(!UseAPX);
16755   match(Set dst (MinI dst src));
16756 
16757   ins_cost(200);
16758   expand %{
16759     rFlagsReg cr;
16760     compI_rReg(cr, dst, src);
16761     cmovI_reg_g(dst, src, cr);
16762   %}
16763 %}
16764 
16765 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16766 %{
16767   predicate(UseAPX);
16768   match(Set dst (MinI src1 src2));
16769   effect(DEF dst, USE src1, USE src2);
16770   flag(PD::Flag_ndd_demotable_opr1);
16771 
16772   ins_cost(200);
16773   expand %{
16774     rFlagsReg cr;
16775     compI_rReg(cr, src1, src2);
16776     cmovI_reg_g_ndd(dst, src1, src2, cr);
16777   %}
16778 %}
16779 
16780 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16781 %{
16782   predicate(!UseAPX);
16783   effect(USE_DEF dst, USE src, USE cr);
16784 
16785   format %{ "cmovllt $dst, $src\t# max" %}
16786   ins_encode %{
16787     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16788   %}
16789   ins_pipe(pipe_cmov_reg);
16790 %}
16791 
16792 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16793 %{
16794   predicate(UseAPX);
16795   effect(DEF dst, USE src1, USE src2, USE cr);
16796 
16797   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16798   ins_encode %{
16799     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16800   %}
16801   ins_pipe(pipe_cmov_reg);
16802 %}
16803 
16804 instruct maxI_rReg(rRegI dst, rRegI src)
16805 %{
16806   predicate(!UseAPX);
16807   match(Set dst (MaxI dst src));
16808 
16809   ins_cost(200);
16810   expand %{
16811     rFlagsReg cr;
16812     compI_rReg(cr, dst, src);
16813     cmovI_reg_l(dst, src, cr);
16814   %}
16815 %}
16816 
16817 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16818 %{
16819   predicate(UseAPX);
16820   match(Set dst (MaxI src1 src2));
16821   effect(DEF dst, USE src1, USE src2);
16822   flag(PD::Flag_ndd_demotable_opr1);
16823 
16824   ins_cost(200);
16825   expand %{
16826     rFlagsReg cr;
16827     compI_rReg(cr, src1, src2);
16828     cmovI_reg_l_ndd(dst, src1, src2, cr);
16829   %}
16830 %}
16831 
16832 // ============================================================================
16833 // Branch Instructions
16834 
16835 // Jump Direct - Label defines a relative address from JMP+1
16836 instruct jmpDir(label labl)
16837 %{
16838   match(Goto);
16839   effect(USE labl);
16840 
16841   ins_cost(300);
16842   format %{ "jmp     $labl" %}
16843   size(5);
16844   ins_encode %{
16845     Label* L = $labl$$label;
16846     __ jmp(*L, false); // Always long jump
16847   %}
16848   ins_pipe(pipe_jmp);
16849 %}
16850 
16851 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16852 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16853 %{
16854   match(If cop cr);
16855   effect(USE labl);
16856 
16857   ins_cost(300);
16858   format %{ "j$cop     $labl" %}
16859   size(6);
16860   ins_encode %{
16861     Label* L = $labl$$label;
16862     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16863   %}
16864   ins_pipe(pipe_jcc);
16865 %}
16866 
16867 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16868 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16869 %{
16870   match(CountedLoopEnd cop cr);
16871   effect(USE labl);
16872 
16873   ins_cost(300);
16874   format %{ "j$cop     $labl\t# loop end" %}
16875   size(6);
16876   ins_encode %{
16877     Label* L = $labl$$label;
16878     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16879   %}
16880   ins_pipe(pipe_jcc);
16881 %}
16882 
16883 // Jump Direct Conditional - using unsigned comparison
16884 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16885   match(If cop cmp);
16886   effect(USE labl);
16887 
16888   ins_cost(300);
16889   format %{ "j$cop,u   $labl" %}
16890   size(6);
16891   ins_encode %{
16892     Label* L = $labl$$label;
16893     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16894   %}
16895   ins_pipe(pipe_jcc);
16896 %}
16897 
16898 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16899   match(If cop cmp);
16900   effect(USE labl);
16901 
16902   ins_cost(200);
16903   format %{ "j$cop,u   $labl" %}
16904   size(6);
16905   ins_encode %{
16906     Label* L = $labl$$label;
16907     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16908   %}
16909   ins_pipe(pipe_jcc);
16910 %}
16911 
16912 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16913   match(If cop cmp);
16914   effect(USE labl);
16915 
16916   ins_cost(200);
16917   format %{ $$template
16918     if ($cop$$cmpcode == Assembler::notEqual) {
16919       $$emit$$"jp,u    $labl\n\t"
16920       $$emit$$"j$cop,u   $labl"
16921     } else {
16922       $$emit$$"jp,u    done\n\t"
16923       $$emit$$"j$cop,u   $labl\n\t"
16924       $$emit$$"done:"
16925     }
16926   %}
16927   ins_encode %{
16928     Label* l = $labl$$label;
16929     if ($cop$$cmpcode == Assembler::notEqual) {
16930       __ jcc(Assembler::parity, *l, false);
16931       __ jcc(Assembler::notEqual, *l, false);
16932     } else if ($cop$$cmpcode == Assembler::equal) {
16933       Label done;
16934       __ jccb(Assembler::parity, done);
16935       __ jcc(Assembler::equal, *l, false);
16936       __ bind(done);
16937     } else {
16938        ShouldNotReachHere();
16939     }
16940   %}
16941   ins_pipe(pipe_jcc);
16942 %}
16943 
16944 // Jump Direct Conditional - using signed and unsigned comparison
16945 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16946   match(If cop cmp);
16947   effect(USE labl);
16948 
16949   ins_cost(200);
16950   format %{ "j$cop,su   $labl" %}
16951   size(6);
16952   ins_encode %{
16953     Label* L = $labl$$label;
16954     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16955   %}
16956   ins_pipe(pipe_jcc);
16957 %}
16958 
16959 // ============================================================================
16960 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16961 // superklass array for an instance of the superklass.  Set a hidden
16962 // internal cache on a hit (cache is checked with exposed code in
16963 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16964 // encoding ALSO sets flags.
16965 
16966 instruct partialSubtypeCheck(rdi_RegP result,
16967                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16968                              rFlagsReg cr)
16969 %{
16970   match(Set result (PartialSubtypeCheck sub super));
16971   predicate(!UseSecondarySupersTable);
16972   effect(KILL rcx, KILL cr);
16973 
16974   ins_cost(1100);  // slightly larger than the next version
16975   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16976             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16977             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16978             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16979             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16980             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16981             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16982     "miss:\t" %}
16983 
16984   ins_encode %{
16985     Label miss;
16986     // NB: Callers may assume that, when $result is a valid register,
16987     // check_klass_subtype_slow_path_linear sets it to a nonzero
16988     // value.
16989     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16990                                             $rcx$$Register, $result$$Register,
16991                                             nullptr, &miss,
16992                                             /*set_cond_codes:*/ true);
16993     __ xorptr($result$$Register, $result$$Register);
16994     __ bind(miss);
16995   %}
16996 
16997   ins_pipe(pipe_slow);
16998 %}
16999 
17000 // ============================================================================
17001 // Two versions of hashtable-based partialSubtypeCheck, both used when
17002 // we need to search for a super class in the secondary supers array.
17003 // The first is used when we don't know _a priori_ the class being
17004 // searched for. The second, far more common, is used when we do know:
17005 // this is used for instanceof, checkcast, and any case where C2 can
17006 // determine it by constant propagation.
17007 
17008 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17009                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17010                                        rFlagsReg cr)
17011 %{
17012   match(Set result (PartialSubtypeCheck sub super));
17013   predicate(UseSecondarySupersTable);
17014   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17015 
17016   ins_cost(1000);
17017   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17018 
17019   ins_encode %{
17020     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17021 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17022   %}
17023 
17024   ins_pipe(pipe_slow);
17025 %}
17026 
17027 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17028                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17029                                        rFlagsReg cr)
17030 %{
17031   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17032   predicate(UseSecondarySupersTable);
17033   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17034 
17035   ins_cost(700);  // smaller than the next version
17036   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17037 
17038   ins_encode %{
17039     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17040     if (InlineSecondarySupersTest) {
17041       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17042                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17043                                        super_klass_slot);
17044     } else {
17045       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17046     }
17047   %}
17048 
17049   ins_pipe(pipe_slow);
17050 %}
17051 
17052 // ============================================================================
17053 // Branch Instructions -- short offset versions
17054 //
17055 // These instructions are used to replace jumps of a long offset (the default
17056 // match) with jumps of a shorter offset.  These instructions are all tagged
17057 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17058 // match rules in general matching.  Instead, the ADLC generates a conversion
17059 // method in the MachNode which can be used to do in-place replacement of the
17060 // long variant with the shorter variant.  The compiler will determine if a
17061 // branch can be taken by the is_short_branch_offset() predicate in the machine
17062 // specific code section of the file.
17063 
17064 // Jump Direct - Label defines a relative address from JMP+1
17065 instruct jmpDir_short(label labl) %{
17066   match(Goto);
17067   effect(USE labl);
17068 
17069   ins_cost(300);
17070   format %{ "jmp,s   $labl" %}
17071   size(2);
17072   ins_encode %{
17073     Label* L = $labl$$label;
17074     __ jmpb(*L);
17075   %}
17076   ins_pipe(pipe_jmp);
17077   ins_short_branch(1);
17078 %}
17079 
17080 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17081 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17082   match(If cop cr);
17083   effect(USE labl);
17084 
17085   ins_cost(300);
17086   format %{ "j$cop,s   $labl" %}
17087   size(2);
17088   ins_encode %{
17089     Label* L = $labl$$label;
17090     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17091   %}
17092   ins_pipe(pipe_jcc);
17093   ins_short_branch(1);
17094 %}
17095 
17096 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17097 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17098   match(CountedLoopEnd cop cr);
17099   effect(USE labl);
17100 
17101   ins_cost(300);
17102   format %{ "j$cop,s   $labl\t# loop end" %}
17103   size(2);
17104   ins_encode %{
17105     Label* L = $labl$$label;
17106     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17107   %}
17108   ins_pipe(pipe_jcc);
17109   ins_short_branch(1);
17110 %}
17111 
17112 // Jump Direct Conditional - using unsigned comparison
17113 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17114   match(If cop cmp);
17115   effect(USE labl);
17116 
17117   ins_cost(300);
17118   format %{ "j$cop,us  $labl" %}
17119   size(2);
17120   ins_encode %{
17121     Label* L = $labl$$label;
17122     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17123   %}
17124   ins_pipe(pipe_jcc);
17125   ins_short_branch(1);
17126 %}
17127 
17128 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17129   match(If cop cmp);
17130   effect(USE labl);
17131 
17132   ins_cost(300);
17133   format %{ "j$cop,us  $labl" %}
17134   size(2);
17135   ins_encode %{
17136     Label* L = $labl$$label;
17137     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17138   %}
17139   ins_pipe(pipe_jcc);
17140   ins_short_branch(1);
17141 %}
17142 
17143 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17144   match(If cop cmp);
17145   effect(USE labl);
17146 
17147   ins_cost(300);
17148   format %{ $$template
17149     if ($cop$$cmpcode == Assembler::notEqual) {
17150       $$emit$$"jp,u,s  $labl\n\t"
17151       $$emit$$"j$cop,u,s  $labl"
17152     } else {
17153       $$emit$$"jp,u,s  done\n\t"
17154       $$emit$$"j$cop,u,s  $labl\n\t"
17155       $$emit$$"done:"
17156     }
17157   %}
17158   size(4);
17159   ins_encode %{
17160     Label* l = $labl$$label;
17161     if ($cop$$cmpcode == Assembler::notEqual) {
17162       __ jccb(Assembler::parity, *l);
17163       __ jccb(Assembler::notEqual, *l);
17164     } else if ($cop$$cmpcode == Assembler::equal) {
17165       Label done;
17166       __ jccb(Assembler::parity, done);
17167       __ jccb(Assembler::equal, *l);
17168       __ bind(done);
17169     } else {
17170        ShouldNotReachHere();
17171     }
17172   %}
17173   ins_pipe(pipe_jcc);
17174   ins_short_branch(1);
17175 %}
17176 
17177 // Jump Direct Conditional - using signed and unsigned comparison
17178 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17179   match(If cop cmp);
17180   effect(USE labl);
17181 
17182   ins_cost(300);
17183   format %{ "j$cop,sus  $labl" %}
17184   size(2);
17185   ins_encode %{
17186     Label* L = $labl$$label;
17187     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17188   %}
17189   ins_pipe(pipe_jcc);
17190   ins_short_branch(1);
17191 %}
17192 
17193 // ============================================================================
17194 // inlined locking and unlocking
17195 
17196 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17197   match(Set cr (FastLock object box));
17198   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17199   ins_cost(300);
17200   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17201   ins_encode %{
17202     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17203   %}
17204   ins_pipe(pipe_slow);
17205 %}
17206 
17207 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17208   match(Set cr (FastUnlock object rax_reg));
17209   effect(TEMP tmp, USE_KILL rax_reg);
17210   ins_cost(300);
17211   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17212   ins_encode %{
17213     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17214   %}
17215   ins_pipe(pipe_slow);
17216 %}
17217 
17218 
17219 // ============================================================================
17220 // Safepoint Instructions
17221 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17222 %{
17223   match(SafePoint poll);
17224   effect(KILL cr, USE poll);
17225 
17226   format %{ "testl   rax, [$poll]\t"
17227             "# Safepoint: poll for GC" %}
17228   ins_cost(125);
17229   ins_encode %{
17230     __ relocate(relocInfo::poll_type);
17231     address pre_pc = __ pc();
17232     __ testl(rax, Address($poll$$Register, 0));
17233     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17234   %}
17235   ins_pipe(ialu_reg_mem);
17236 %}
17237 
17238 instruct mask_all_evexL(kReg dst, rRegL src) %{
17239   match(Set dst (MaskAll src));
17240   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17241   ins_encode %{
17242     int mask_len = Matcher::vector_length(this);
17243     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17244   %}
17245   ins_pipe( pipe_slow );
17246 %}
17247 
17248 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17249   predicate(Matcher::vector_length(n) > 32);
17250   match(Set dst (MaskAll src));
17251   effect(TEMP tmp);
17252   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17253   ins_encode %{
17254     int mask_len = Matcher::vector_length(this);
17255     __ movslq($tmp$$Register, $src$$Register);
17256     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17257   %}
17258   ins_pipe( pipe_slow );
17259 %}
17260 
17261 // ============================================================================
17262 // Procedure Call/Return Instructions
17263 // Call Java Static Instruction
17264 // Note: If this code changes, the corresponding ret_addr_offset() and
17265 //       compute_padding() functions will have to be adjusted.
17266 instruct CallStaticJavaDirect(method meth) %{
17267   match(CallStaticJava);
17268   effect(USE meth);
17269 
17270   ins_cost(300);
17271   format %{ "call,static " %}
17272   opcode(0xE8); /* E8 cd */
17273   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17274   ins_pipe(pipe_slow);
17275   ins_alignment(4);
17276 %}
17277 
17278 // Call Java Dynamic Instruction
17279 // Note: If this code changes, the corresponding ret_addr_offset() and
17280 //       compute_padding() functions will have to be adjusted.
17281 instruct CallDynamicJavaDirect(method meth)
17282 %{
17283   match(CallDynamicJava);
17284   effect(USE meth);
17285 
17286   ins_cost(300);
17287   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17288             "call,dynamic " %}
17289   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17290   ins_pipe(pipe_slow);
17291   ins_alignment(4);
17292 %}
17293 
17294 // Call Runtime Instruction
17295 instruct CallRuntimeDirect(method meth)
17296 %{
17297   match(CallRuntime);
17298   effect(USE meth);
17299 
17300   ins_cost(300);
17301   format %{ "call,runtime " %}
17302   ins_encode(clear_avx, Java_To_Runtime(meth));
17303   ins_pipe(pipe_slow);
17304 %}
17305 
17306 // Call runtime without safepoint
17307 instruct CallLeafDirect(method meth)
17308 %{
17309   match(CallLeaf);
17310   effect(USE meth);
17311 
17312   ins_cost(300);
17313   format %{ "call_leaf,runtime " %}
17314   ins_encode(clear_avx, Java_To_Runtime(meth));
17315   ins_pipe(pipe_slow);
17316 %}
17317 
17318 // Call runtime without safepoint and with vector arguments
17319 instruct CallLeafDirectVector(method meth)
17320 %{
17321   match(CallLeafVector);
17322   effect(USE meth);
17323 
17324   ins_cost(300);
17325   format %{ "call_leaf,vector " %}
17326   ins_encode(Java_To_Runtime(meth));
17327   ins_pipe(pipe_slow);
17328 %}
17329 
17330 // Call runtime without safepoint
17331 instruct CallLeafNoFPDirect(method meth)
17332 %{
17333   match(CallLeafNoFP);
17334   effect(USE meth);
17335 
17336   ins_cost(300);
17337   format %{ "call_leaf_nofp,runtime " %}
17338   ins_encode(clear_avx, Java_To_Runtime(meth));
17339   ins_pipe(pipe_slow);
17340 %}
17341 
17342 // Return Instruction
17343 // Remove the return address & jump to it.
17344 // Notice: We always emit a nop after a ret to make sure there is room
17345 // for safepoint patching
17346 instruct Ret()
17347 %{
17348   match(Return);
17349 
17350   format %{ "ret" %}
17351   ins_encode %{
17352     __ ret(0);
17353   %}
17354   ins_pipe(pipe_jmp);
17355 %}
17356 
17357 // Tail Call; Jump from runtime stub to Java code.
17358 // Also known as an 'interprocedural jump'.
17359 // Target of jump will eventually return to caller.
17360 // TailJump below removes the return address.
17361 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17362 // emitted just above the TailCall which has reset rbp to the caller state.
17363 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17364 %{
17365   match(TailCall jump_target method_ptr);
17366 
17367   ins_cost(300);
17368   format %{ "jmp     $jump_target\t# rbx holds method" %}
17369   ins_encode %{
17370     __ jmp($jump_target$$Register);
17371   %}
17372   ins_pipe(pipe_jmp);
17373 %}
17374 
17375 // Tail Jump; remove the return address; jump to target.
17376 // TailCall above leaves the return address around.
17377 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17378 %{
17379   match(TailJump jump_target ex_oop);
17380 
17381   ins_cost(300);
17382   format %{ "popq    rdx\t# pop return address\n\t"
17383             "jmp     $jump_target" %}
17384   ins_encode %{
17385     __ popq(as_Register(RDX_enc));
17386     __ jmp($jump_target$$Register);
17387   %}
17388   ins_pipe(pipe_jmp);
17389 %}
17390 
17391 // Forward exception.
17392 instruct ForwardExceptionjmp()
17393 %{
17394   match(ForwardException);
17395 
17396   format %{ "jmp     forward_exception_stub" %}
17397   ins_encode %{
17398     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17399   %}
17400   ins_pipe(pipe_jmp);
17401 %}
17402 
17403 // Create exception oop: created by stack-crawling runtime code.
17404 // Created exception is now available to this handler, and is setup
17405 // just prior to jumping to this handler.  No code emitted.
17406 instruct CreateException(rax_RegP ex_oop)
17407 %{
17408   match(Set ex_oop (CreateEx));
17409 
17410   size(0);
17411   // use the following format syntax
17412   format %{ "# exception oop is in rax; no code emitted" %}
17413   ins_encode();
17414   ins_pipe(empty);
17415 %}
17416 
17417 // Rethrow exception:
17418 // The exception oop will come in the first argument position.
17419 // Then JUMP (not call) to the rethrow stub code.
17420 instruct RethrowException()
17421 %{
17422   match(Rethrow);
17423 
17424   // use the following format syntax
17425   format %{ "jmp     rethrow_stub" %}
17426   ins_encode %{
17427     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17428   %}
17429   ins_pipe(pipe_jmp);
17430 %}
17431 
17432 // ============================================================================
17433 // This name is KNOWN by the ADLC and cannot be changed.
17434 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17435 // for this guy.
17436 instruct tlsLoadP(r15_RegP dst) %{
17437   match(Set dst (ThreadLocal));
17438   effect(DEF dst);
17439 
17440   size(0);
17441   format %{ "# TLS is in R15" %}
17442   ins_encode( /*empty encoding*/ );
17443   ins_pipe(ialu_reg_reg);
17444 %}
17445 
17446 instruct addF_reg(regF dst, regF src) %{
17447   predicate(UseAVX == 0);
17448   match(Set dst (AddF dst src));
17449 
17450   format %{ "addss   $dst, $src" %}
17451   ins_cost(150);
17452   ins_encode %{
17453     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17454   %}
17455   ins_pipe(pipe_slow);
17456 %}
17457 
17458 instruct addF_mem(regF dst, memory src) %{
17459   predicate(UseAVX == 0);
17460   match(Set dst (AddF dst (LoadF src)));
17461 
17462   format %{ "addss   $dst, $src" %}
17463   ins_cost(150);
17464   ins_encode %{
17465     __ addss($dst$$XMMRegister, $src$$Address);
17466   %}
17467   ins_pipe(pipe_slow);
17468 %}
17469 
17470 instruct addF_imm(regF dst, immF con) %{
17471   predicate(UseAVX == 0);
17472   match(Set dst (AddF dst con));
17473   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17474   ins_cost(150);
17475   ins_encode %{
17476     __ addss($dst$$XMMRegister, $constantaddress($con));
17477   %}
17478   ins_pipe(pipe_slow);
17479 %}
17480 
17481 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17482   predicate(UseAVX > 0);
17483   match(Set dst (AddF src1 src2));
17484 
17485   format %{ "vaddss  $dst, $src1, $src2" %}
17486   ins_cost(150);
17487   ins_encode %{
17488     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17489   %}
17490   ins_pipe(pipe_slow);
17491 %}
17492 
17493 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17494   predicate(UseAVX > 0);
17495   match(Set dst (AddF src1 (LoadF src2)));
17496 
17497   format %{ "vaddss  $dst, $src1, $src2" %}
17498   ins_cost(150);
17499   ins_encode %{
17500     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17501   %}
17502   ins_pipe(pipe_slow);
17503 %}
17504 
17505 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17506   predicate(UseAVX > 0);
17507   match(Set dst (AddF src con));
17508 
17509   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17510   ins_cost(150);
17511   ins_encode %{
17512     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17513   %}
17514   ins_pipe(pipe_slow);
17515 %}
17516 
17517 instruct addD_reg(regD dst, regD src) %{
17518   predicate(UseAVX == 0);
17519   match(Set dst (AddD dst src));
17520 
17521   format %{ "addsd   $dst, $src" %}
17522   ins_cost(150);
17523   ins_encode %{
17524     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17525   %}
17526   ins_pipe(pipe_slow);
17527 %}
17528 
17529 instruct addD_mem(regD dst, memory src) %{
17530   predicate(UseAVX == 0);
17531   match(Set dst (AddD dst (LoadD src)));
17532 
17533   format %{ "addsd   $dst, $src" %}
17534   ins_cost(150);
17535   ins_encode %{
17536     __ addsd($dst$$XMMRegister, $src$$Address);
17537   %}
17538   ins_pipe(pipe_slow);
17539 %}
17540 
17541 instruct addD_imm(regD dst, immD con) %{
17542   predicate(UseAVX == 0);
17543   match(Set dst (AddD dst con));
17544   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17545   ins_cost(150);
17546   ins_encode %{
17547     __ addsd($dst$$XMMRegister, $constantaddress($con));
17548   %}
17549   ins_pipe(pipe_slow);
17550 %}
17551 
17552 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17553   predicate(UseAVX > 0);
17554   match(Set dst (AddD src1 src2));
17555 
17556   format %{ "vaddsd  $dst, $src1, $src2" %}
17557   ins_cost(150);
17558   ins_encode %{
17559     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17560   %}
17561   ins_pipe(pipe_slow);
17562 %}
17563 
17564 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17565   predicate(UseAVX > 0);
17566   match(Set dst (AddD src1 (LoadD src2)));
17567 
17568   format %{ "vaddsd  $dst, $src1, $src2" %}
17569   ins_cost(150);
17570   ins_encode %{
17571     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17572   %}
17573   ins_pipe(pipe_slow);
17574 %}
17575 
17576 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17577   predicate(UseAVX > 0);
17578   match(Set dst (AddD src con));
17579 
17580   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17581   ins_cost(150);
17582   ins_encode %{
17583     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17584   %}
17585   ins_pipe(pipe_slow);
17586 %}
17587 
17588 instruct subF_reg(regF dst, regF src) %{
17589   predicate(UseAVX == 0);
17590   match(Set dst (SubF dst src));
17591 
17592   format %{ "subss   $dst, $src" %}
17593   ins_cost(150);
17594   ins_encode %{
17595     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17596   %}
17597   ins_pipe(pipe_slow);
17598 %}
17599 
17600 instruct subF_mem(regF dst, memory src) %{
17601   predicate(UseAVX == 0);
17602   match(Set dst (SubF dst (LoadF src)));
17603 
17604   format %{ "subss   $dst, $src" %}
17605   ins_cost(150);
17606   ins_encode %{
17607     __ subss($dst$$XMMRegister, $src$$Address);
17608   %}
17609   ins_pipe(pipe_slow);
17610 %}
17611 
17612 instruct subF_imm(regF dst, immF con) %{
17613   predicate(UseAVX == 0);
17614   match(Set dst (SubF dst con));
17615   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17616   ins_cost(150);
17617   ins_encode %{
17618     __ subss($dst$$XMMRegister, $constantaddress($con));
17619   %}
17620   ins_pipe(pipe_slow);
17621 %}
17622 
17623 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17624   predicate(UseAVX > 0);
17625   match(Set dst (SubF src1 src2));
17626 
17627   format %{ "vsubss  $dst, $src1, $src2" %}
17628   ins_cost(150);
17629   ins_encode %{
17630     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17631   %}
17632   ins_pipe(pipe_slow);
17633 %}
17634 
17635 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17636   predicate(UseAVX > 0);
17637   match(Set dst (SubF src1 (LoadF src2)));
17638 
17639   format %{ "vsubss  $dst, $src1, $src2" %}
17640   ins_cost(150);
17641   ins_encode %{
17642     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17643   %}
17644   ins_pipe(pipe_slow);
17645 %}
17646 
17647 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17648   predicate(UseAVX > 0);
17649   match(Set dst (SubF src con));
17650 
17651   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17652   ins_cost(150);
17653   ins_encode %{
17654     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17655   %}
17656   ins_pipe(pipe_slow);
17657 %}
17658 
17659 instruct subD_reg(regD dst, regD src) %{
17660   predicate(UseAVX == 0);
17661   match(Set dst (SubD dst src));
17662 
17663   format %{ "subsd   $dst, $src" %}
17664   ins_cost(150);
17665   ins_encode %{
17666     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17667   %}
17668   ins_pipe(pipe_slow);
17669 %}
17670 
17671 instruct subD_mem(regD dst, memory src) %{
17672   predicate(UseAVX == 0);
17673   match(Set dst (SubD dst (LoadD src)));
17674 
17675   format %{ "subsd   $dst, $src" %}
17676   ins_cost(150);
17677   ins_encode %{
17678     __ subsd($dst$$XMMRegister, $src$$Address);
17679   %}
17680   ins_pipe(pipe_slow);
17681 %}
17682 
17683 instruct subD_imm(regD dst, immD con) %{
17684   predicate(UseAVX == 0);
17685   match(Set dst (SubD dst con));
17686   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17687   ins_cost(150);
17688   ins_encode %{
17689     __ subsd($dst$$XMMRegister, $constantaddress($con));
17690   %}
17691   ins_pipe(pipe_slow);
17692 %}
17693 
17694 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17695   predicate(UseAVX > 0);
17696   match(Set dst (SubD src1 src2));
17697 
17698   format %{ "vsubsd  $dst, $src1, $src2" %}
17699   ins_cost(150);
17700   ins_encode %{
17701     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17702   %}
17703   ins_pipe(pipe_slow);
17704 %}
17705 
17706 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17707   predicate(UseAVX > 0);
17708   match(Set dst (SubD src1 (LoadD src2)));
17709 
17710   format %{ "vsubsd  $dst, $src1, $src2" %}
17711   ins_cost(150);
17712   ins_encode %{
17713     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17714   %}
17715   ins_pipe(pipe_slow);
17716 %}
17717 
17718 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17719   predicate(UseAVX > 0);
17720   match(Set dst (SubD src con));
17721 
17722   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17723   ins_cost(150);
17724   ins_encode %{
17725     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17726   %}
17727   ins_pipe(pipe_slow);
17728 %}
17729 
17730 instruct mulF_reg(regF dst, regF src) %{
17731   predicate(UseAVX == 0);
17732   match(Set dst (MulF dst src));
17733 
17734   format %{ "mulss   $dst, $src" %}
17735   ins_cost(150);
17736   ins_encode %{
17737     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17738   %}
17739   ins_pipe(pipe_slow);
17740 %}
17741 
17742 instruct mulF_mem(regF dst, memory src) %{
17743   predicate(UseAVX == 0);
17744   match(Set dst (MulF dst (LoadF src)));
17745 
17746   format %{ "mulss   $dst, $src" %}
17747   ins_cost(150);
17748   ins_encode %{
17749     __ mulss($dst$$XMMRegister, $src$$Address);
17750   %}
17751   ins_pipe(pipe_slow);
17752 %}
17753 
17754 instruct mulF_imm(regF dst, immF con) %{
17755   predicate(UseAVX == 0);
17756   match(Set dst (MulF dst con));
17757   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17758   ins_cost(150);
17759   ins_encode %{
17760     __ mulss($dst$$XMMRegister, $constantaddress($con));
17761   %}
17762   ins_pipe(pipe_slow);
17763 %}
17764 
17765 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17766   predicate(UseAVX > 0);
17767   match(Set dst (MulF src1 src2));
17768 
17769   format %{ "vmulss  $dst, $src1, $src2" %}
17770   ins_cost(150);
17771   ins_encode %{
17772     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17773   %}
17774   ins_pipe(pipe_slow);
17775 %}
17776 
17777 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17778   predicate(UseAVX > 0);
17779   match(Set dst (MulF src1 (LoadF src2)));
17780 
17781   format %{ "vmulss  $dst, $src1, $src2" %}
17782   ins_cost(150);
17783   ins_encode %{
17784     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17785   %}
17786   ins_pipe(pipe_slow);
17787 %}
17788 
17789 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17790   predicate(UseAVX > 0);
17791   match(Set dst (MulF src con));
17792 
17793   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17794   ins_cost(150);
17795   ins_encode %{
17796     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17797   %}
17798   ins_pipe(pipe_slow);
17799 %}
17800 
17801 instruct mulD_reg(regD dst, regD src) %{
17802   predicate(UseAVX == 0);
17803   match(Set dst (MulD dst src));
17804 
17805   format %{ "mulsd   $dst, $src" %}
17806   ins_cost(150);
17807   ins_encode %{
17808     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17809   %}
17810   ins_pipe(pipe_slow);
17811 %}
17812 
17813 instruct mulD_mem(regD dst, memory src) %{
17814   predicate(UseAVX == 0);
17815   match(Set dst (MulD dst (LoadD src)));
17816 
17817   format %{ "mulsd   $dst, $src" %}
17818   ins_cost(150);
17819   ins_encode %{
17820     __ mulsd($dst$$XMMRegister, $src$$Address);
17821   %}
17822   ins_pipe(pipe_slow);
17823 %}
17824 
17825 instruct mulD_imm(regD dst, immD con) %{
17826   predicate(UseAVX == 0);
17827   match(Set dst (MulD dst con));
17828   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17829   ins_cost(150);
17830   ins_encode %{
17831     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17832   %}
17833   ins_pipe(pipe_slow);
17834 %}
17835 
17836 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17837   predicate(UseAVX > 0);
17838   match(Set dst (MulD src1 src2));
17839 
17840   format %{ "vmulsd  $dst, $src1, $src2" %}
17841   ins_cost(150);
17842   ins_encode %{
17843     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17844   %}
17845   ins_pipe(pipe_slow);
17846 %}
17847 
17848 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17849   predicate(UseAVX > 0);
17850   match(Set dst (MulD src1 (LoadD src2)));
17851 
17852   format %{ "vmulsd  $dst, $src1, $src2" %}
17853   ins_cost(150);
17854   ins_encode %{
17855     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17856   %}
17857   ins_pipe(pipe_slow);
17858 %}
17859 
17860 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17861   predicate(UseAVX > 0);
17862   match(Set dst (MulD src con));
17863 
17864   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17865   ins_cost(150);
17866   ins_encode %{
17867     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17868   %}
17869   ins_pipe(pipe_slow);
17870 %}
17871 
17872 instruct divF_reg(regF dst, regF src) %{
17873   predicate(UseAVX == 0);
17874   match(Set dst (DivF dst src));
17875 
17876   format %{ "divss   $dst, $src" %}
17877   ins_cost(150);
17878   ins_encode %{
17879     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17880   %}
17881   ins_pipe(pipe_slow);
17882 %}
17883 
17884 instruct divF_mem(regF dst, memory src) %{
17885   predicate(UseAVX == 0);
17886   match(Set dst (DivF dst (LoadF src)));
17887 
17888   format %{ "divss   $dst, $src" %}
17889   ins_cost(150);
17890   ins_encode %{
17891     __ divss($dst$$XMMRegister, $src$$Address);
17892   %}
17893   ins_pipe(pipe_slow);
17894 %}
17895 
17896 instruct divF_imm(regF dst, immF con) %{
17897   predicate(UseAVX == 0);
17898   match(Set dst (DivF dst con));
17899   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17900   ins_cost(150);
17901   ins_encode %{
17902     __ divss($dst$$XMMRegister, $constantaddress($con));
17903   %}
17904   ins_pipe(pipe_slow);
17905 %}
17906 
17907 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17908   predicate(UseAVX > 0);
17909   match(Set dst (DivF src1 src2));
17910 
17911   format %{ "vdivss  $dst, $src1, $src2" %}
17912   ins_cost(150);
17913   ins_encode %{
17914     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17915   %}
17916   ins_pipe(pipe_slow);
17917 %}
17918 
17919 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17920   predicate(UseAVX > 0);
17921   match(Set dst (DivF src1 (LoadF src2)));
17922 
17923   format %{ "vdivss  $dst, $src1, $src2" %}
17924   ins_cost(150);
17925   ins_encode %{
17926     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17927   %}
17928   ins_pipe(pipe_slow);
17929 %}
17930 
17931 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17932   predicate(UseAVX > 0);
17933   match(Set dst (DivF src con));
17934 
17935   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17936   ins_cost(150);
17937   ins_encode %{
17938     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17939   %}
17940   ins_pipe(pipe_slow);
17941 %}
17942 
17943 instruct divD_reg(regD dst, regD src) %{
17944   predicate(UseAVX == 0);
17945   match(Set dst (DivD dst src));
17946 
17947   format %{ "divsd   $dst, $src" %}
17948   ins_cost(150);
17949   ins_encode %{
17950     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17951   %}
17952   ins_pipe(pipe_slow);
17953 %}
17954 
17955 instruct divD_mem(regD dst, memory src) %{
17956   predicate(UseAVX == 0);
17957   match(Set dst (DivD dst (LoadD src)));
17958 
17959   format %{ "divsd   $dst, $src" %}
17960   ins_cost(150);
17961   ins_encode %{
17962     __ divsd($dst$$XMMRegister, $src$$Address);
17963   %}
17964   ins_pipe(pipe_slow);
17965 %}
17966 
17967 instruct divD_imm(regD dst, immD con) %{
17968   predicate(UseAVX == 0);
17969   match(Set dst (DivD dst con));
17970   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17971   ins_cost(150);
17972   ins_encode %{
17973     __ divsd($dst$$XMMRegister, $constantaddress($con));
17974   %}
17975   ins_pipe(pipe_slow);
17976 %}
17977 
17978 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17979   predicate(UseAVX > 0);
17980   match(Set dst (DivD src1 src2));
17981 
17982   format %{ "vdivsd  $dst, $src1, $src2" %}
17983   ins_cost(150);
17984   ins_encode %{
17985     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17986   %}
17987   ins_pipe(pipe_slow);
17988 %}
17989 
17990 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17991   predicate(UseAVX > 0);
17992   match(Set dst (DivD src1 (LoadD src2)));
17993 
17994   format %{ "vdivsd  $dst, $src1, $src2" %}
17995   ins_cost(150);
17996   ins_encode %{
17997     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17998   %}
17999   ins_pipe(pipe_slow);
18000 %}
18001 
18002 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18003   predicate(UseAVX > 0);
18004   match(Set dst (DivD src con));
18005 
18006   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18007   ins_cost(150);
18008   ins_encode %{
18009     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18010   %}
18011   ins_pipe(pipe_slow);
18012 %}
18013 
18014 instruct absF_reg(regF dst) %{
18015   predicate(UseAVX == 0);
18016   match(Set dst (AbsF dst));
18017   ins_cost(150);
18018   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18019   ins_encode %{
18020     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18021   %}
18022   ins_pipe(pipe_slow);
18023 %}
18024 
18025 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18026   predicate(UseAVX > 0);
18027   match(Set dst (AbsF src));
18028   ins_cost(150);
18029   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18030   ins_encode %{
18031     int vlen_enc = Assembler::AVX_128bit;
18032     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18033               ExternalAddress(float_signmask()), vlen_enc);
18034   %}
18035   ins_pipe(pipe_slow);
18036 %}
18037 
18038 instruct absD_reg(regD dst) %{
18039   predicate(UseAVX == 0);
18040   match(Set dst (AbsD dst));
18041   ins_cost(150);
18042   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18043             "# abs double by sign masking" %}
18044   ins_encode %{
18045     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18046   %}
18047   ins_pipe(pipe_slow);
18048 %}
18049 
18050 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18051   predicate(UseAVX > 0);
18052   match(Set dst (AbsD src));
18053   ins_cost(150);
18054   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18055             "# abs double by sign masking" %}
18056   ins_encode %{
18057     int vlen_enc = Assembler::AVX_128bit;
18058     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18059               ExternalAddress(double_signmask()), vlen_enc);
18060   %}
18061   ins_pipe(pipe_slow);
18062 %}
18063 
18064 instruct negF_reg(regF dst) %{
18065   predicate(UseAVX == 0);
18066   match(Set dst (NegF dst));
18067   ins_cost(150);
18068   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18069   ins_encode %{
18070     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18071   %}
18072   ins_pipe(pipe_slow);
18073 %}
18074 
18075 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18076   predicate(UseAVX > 0);
18077   match(Set dst (NegF src));
18078   ins_cost(150);
18079   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18080   ins_encode %{
18081     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18082                  ExternalAddress(float_signflip()));
18083   %}
18084   ins_pipe(pipe_slow);
18085 %}
18086 
18087 instruct negD_reg(regD dst) %{
18088   predicate(UseAVX == 0);
18089   match(Set dst (NegD dst));
18090   ins_cost(150);
18091   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18092             "# neg double by sign flipping" %}
18093   ins_encode %{
18094     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18095   %}
18096   ins_pipe(pipe_slow);
18097 %}
18098 
18099 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18100   predicate(UseAVX > 0);
18101   match(Set dst (NegD src));
18102   ins_cost(150);
18103   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18104             "# neg double by sign flipping" %}
18105   ins_encode %{
18106     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18107                  ExternalAddress(double_signflip()));
18108   %}
18109   ins_pipe(pipe_slow);
18110 %}
18111 
18112 // sqrtss instruction needs destination register to be pre initialized for best performance
18113 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18114 instruct sqrtF_reg(regF dst) %{
18115   match(Set dst (SqrtF dst));
18116   format %{ "sqrtss  $dst, $dst" %}
18117   ins_encode %{
18118     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18119   %}
18120   ins_pipe(pipe_slow);
18121 %}
18122 
18123 // sqrtsd instruction needs destination register to be pre initialized for best performance
18124 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18125 instruct sqrtD_reg(regD dst) %{
18126   match(Set dst (SqrtD dst));
18127   format %{ "sqrtsd  $dst, $dst" %}
18128   ins_encode %{
18129     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18130   %}
18131   ins_pipe(pipe_slow);
18132 %}
18133 
18134 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18135   effect(TEMP tmp);
18136   match(Set dst (ConvF2HF src));
18137   ins_cost(125);
18138   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18139   ins_encode %{
18140     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18141   %}
18142   ins_pipe( pipe_slow );
18143 %}
18144 
18145 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18146   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18147   effect(TEMP ktmp, TEMP rtmp);
18148   match(Set mem (StoreC mem (ConvF2HF src)));
18149   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18150   ins_encode %{
18151     __ movl($rtmp$$Register, 0x1);
18152     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18153     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18154   %}
18155   ins_pipe( pipe_slow );
18156 %}
18157 
18158 instruct vconvF2HF(vec dst, vec src) %{
18159   match(Set dst (VectorCastF2HF src));
18160   format %{ "vector_conv_F2HF $dst $src" %}
18161   ins_encode %{
18162     int vlen_enc = vector_length_encoding(this, $src);
18163     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18164   %}
18165   ins_pipe( pipe_slow );
18166 %}
18167 
18168 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18169   predicate(n->as_StoreVector()->memory_size() >= 16);
18170   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18171   format %{ "vcvtps2ph $mem,$src" %}
18172   ins_encode %{
18173     int vlen_enc = vector_length_encoding(this, $src);
18174     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18175   %}
18176   ins_pipe( pipe_slow );
18177 %}
18178 
18179 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18180   match(Set dst (ConvHF2F src));
18181   format %{ "vcvtph2ps $dst,$src" %}
18182   ins_encode %{
18183     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18184   %}
18185   ins_pipe( pipe_slow );
18186 %}
18187 
18188 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18189   match(Set dst (VectorCastHF2F (LoadVector mem)));
18190   format %{ "vcvtph2ps $dst,$mem" %}
18191   ins_encode %{
18192     int vlen_enc = vector_length_encoding(this);
18193     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18194   %}
18195   ins_pipe( pipe_slow );
18196 %}
18197 
18198 instruct vconvHF2F(vec dst, vec src) %{
18199   match(Set dst (VectorCastHF2F src));
18200   ins_cost(125);
18201   format %{ "vector_conv_HF2F $dst,$src" %}
18202   ins_encode %{
18203     int vlen_enc = vector_length_encoding(this);
18204     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18205   %}
18206   ins_pipe( pipe_slow );
18207 %}
18208 
18209 // ---------------------------------------- VectorReinterpret ------------------------------------
18210 instruct reinterpret_mask(kReg dst) %{
18211   predicate(n->bottom_type()->isa_pvectmask() &&
18212             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18213   match(Set dst (VectorReinterpret dst));
18214   ins_cost(125);
18215   format %{ "vector_reinterpret $dst\t!" %}
18216   ins_encode %{
18217     // empty
18218   %}
18219   ins_pipe( pipe_slow );
18220 %}
18221 
18222 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18223   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18224             n->bottom_type()->isa_pvectmask() &&
18225             n->in(1)->bottom_type()->isa_pvectmask() &&
18226             n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18227             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18228   match(Set dst (VectorReinterpret src));
18229   effect(TEMP xtmp);
18230   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18231   ins_encode %{
18232      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18233      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18234      assert(src_sz == dst_sz , "src and dst size mismatch");
18235      int vlen_enc = vector_length_encoding(src_sz);
18236      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18237      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18238   %}
18239   ins_pipe( pipe_slow );
18240 %}
18241 
18242 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18243   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18244             n->bottom_type()->isa_pvectmask() &&
18245             n->in(1)->bottom_type()->isa_pvectmask() &&
18246             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18247              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18248             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18249   match(Set dst (VectorReinterpret src));
18250   effect(TEMP xtmp);
18251   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18252   ins_encode %{
18253      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18254      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18255      assert(src_sz == dst_sz , "src and dst size mismatch");
18256      int vlen_enc = vector_length_encoding(src_sz);
18257      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18258      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18259   %}
18260   ins_pipe( pipe_slow );
18261 %}
18262 
18263 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18264   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18265             n->bottom_type()->isa_pvectmask() &&
18266             n->in(1)->bottom_type()->isa_pvectmask() &&
18267             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18268              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18269             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18270   match(Set dst (VectorReinterpret src));
18271   effect(TEMP xtmp);
18272   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18273   ins_encode %{
18274      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18275      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18276      assert(src_sz == dst_sz , "src and dst size mismatch");
18277      int vlen_enc = vector_length_encoding(src_sz);
18278      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18279      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18280   %}
18281   ins_pipe( pipe_slow );
18282 %}
18283 
18284 instruct reinterpret(vec dst) %{
18285   predicate(!n->bottom_type()->isa_pvectmask() &&
18286             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18287   match(Set dst (VectorReinterpret dst));
18288   ins_cost(125);
18289   format %{ "vector_reinterpret $dst\t!" %}
18290   ins_encode %{
18291     // empty
18292   %}
18293   ins_pipe( pipe_slow );
18294 %}
18295 
18296 instruct reinterpret_expand(vec dst, vec src) %{
18297   predicate(UseAVX == 0 &&
18298             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18299   match(Set dst (VectorReinterpret src));
18300   ins_cost(125);
18301   effect(TEMP dst);
18302   format %{ "vector_reinterpret_expand $dst,$src" %}
18303   ins_encode %{
18304     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18305     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18306 
18307     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18308     if (src_vlen_in_bytes == 4) {
18309       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18310     } else {
18311       assert(src_vlen_in_bytes == 8, "");
18312       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18313     }
18314     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18315   %}
18316   ins_pipe( pipe_slow );
18317 %}
18318 
18319 instruct vreinterpret_expand4(legVec dst, vec src) %{
18320   predicate(UseAVX > 0 &&
18321             !n->bottom_type()->isa_pvectmask() &&
18322             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18323             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18324   match(Set dst (VectorReinterpret src));
18325   ins_cost(125);
18326   format %{ "vector_reinterpret_expand $dst,$src" %}
18327   ins_encode %{
18328     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18329   %}
18330   ins_pipe( pipe_slow );
18331 %}
18332 
18333 
18334 instruct vreinterpret_expand(legVec dst, vec src) %{
18335   predicate(UseAVX > 0 &&
18336             !n->bottom_type()->isa_pvectmask() &&
18337             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18338             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18339   match(Set dst (VectorReinterpret src));
18340   ins_cost(125);
18341   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18342   ins_encode %{
18343     switch (Matcher::vector_length_in_bytes(this, $src)) {
18344       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18345       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18346       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18347       default: ShouldNotReachHere();
18348     }
18349   %}
18350   ins_pipe( pipe_slow );
18351 %}
18352 
18353 instruct reinterpret_shrink(vec dst, legVec src) %{
18354   predicate(!n->bottom_type()->isa_pvectmask() &&
18355             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18356   match(Set dst (VectorReinterpret src));
18357   ins_cost(125);
18358   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18359   ins_encode %{
18360     switch (Matcher::vector_length_in_bytes(this)) {
18361       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18362       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18363       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18364       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18365       default: ShouldNotReachHere();
18366     }
18367   %}
18368   ins_pipe( pipe_slow );
18369 %}
18370 
18371 // ----------------------------------------------------------------------------------------------------
18372 
18373 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18374   match(Set dst (RoundDoubleMode src rmode));
18375   format %{ "roundsd $dst,$src" %}
18376   ins_cost(150);
18377   ins_encode %{
18378     assert(UseSSE >= 4, "required");
18379     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18380       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18381     }
18382     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18383   %}
18384   ins_pipe(pipe_slow);
18385 %}
18386 
18387 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18388   match(Set dst (RoundDoubleMode con rmode));
18389   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18390   ins_cost(150);
18391   ins_encode %{
18392     assert(UseSSE >= 4, "required");
18393     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18394   %}
18395   ins_pipe(pipe_slow);
18396 %}
18397 
18398 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18399   predicate(Matcher::vector_length(n) < 8);
18400   match(Set dst (RoundDoubleModeV src rmode));
18401   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18402   ins_encode %{
18403     assert(UseAVX > 0, "required");
18404     int vlen_enc = vector_length_encoding(this);
18405     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18406   %}
18407   ins_pipe( pipe_slow );
18408 %}
18409 
18410 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18411   predicate(Matcher::vector_length(n) == 8);
18412   match(Set dst (RoundDoubleModeV src rmode));
18413   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18414   ins_encode %{
18415     assert(UseAVX > 2, "required");
18416     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18417   %}
18418   ins_pipe( pipe_slow );
18419 %}
18420 
18421 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18422   predicate(Matcher::vector_length(n) < 8);
18423   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18424   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18425   ins_encode %{
18426     assert(UseAVX > 0, "required");
18427     int vlen_enc = vector_length_encoding(this);
18428     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18429   %}
18430   ins_pipe( pipe_slow );
18431 %}
18432 
18433 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18434   predicate(Matcher::vector_length(n) == 8);
18435   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18436   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18437   ins_encode %{
18438     assert(UseAVX > 2, "required");
18439     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18440   %}
18441   ins_pipe( pipe_slow );
18442 %}
18443 
18444 instruct onspinwait() %{
18445   match(OnSpinWait);
18446   ins_cost(200);
18447 
18448   format %{
18449     $$template
18450     $$emit$$"pause\t! membar_onspinwait"
18451   %}
18452   ins_encode %{
18453     __ pause();
18454   %}
18455   ins_pipe(pipe_slow);
18456 %}
18457 
18458 // a * b + c
18459 instruct fmaD_reg(regD a, regD b, regD c) %{
18460   match(Set c (FmaD  c (Binary a b)));
18461   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18462   ins_cost(150);
18463   ins_encode %{
18464     assert(UseFMA, "Needs FMA instructions support.");
18465     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18466   %}
18467   ins_pipe( pipe_slow );
18468 %}
18469 
18470 // a * b + c
18471 instruct fmaF_reg(regF a, regF b, regF c) %{
18472   match(Set c (FmaF  c (Binary a b)));
18473   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18474   ins_cost(150);
18475   ins_encode %{
18476     assert(UseFMA, "Needs FMA instructions support.");
18477     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18478   %}
18479   ins_pipe( pipe_slow );
18480 %}
18481 
18482 // ====================VECTOR INSTRUCTIONS=====================================
18483 
18484 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18485 instruct MoveVec2Leg(legVec dst, vec src) %{
18486   match(Set dst src);
18487   format %{ "" %}
18488   ins_encode %{
18489     ShouldNotReachHere();
18490   %}
18491   ins_pipe( fpu_reg_reg );
18492 %}
18493 
18494 instruct MoveLeg2Vec(vec dst, legVec src) %{
18495   match(Set dst src);
18496   format %{ "" %}
18497   ins_encode %{
18498     ShouldNotReachHere();
18499   %}
18500   ins_pipe( fpu_reg_reg );
18501 %}
18502 
18503 // ============================================================================
18504 
18505 // Load vectors generic operand pattern
18506 instruct loadV(vec dst, memory mem) %{
18507   match(Set dst (LoadVector mem));
18508   ins_cost(125);
18509   format %{ "load_vector $dst,$mem" %}
18510   ins_encode %{
18511     BasicType bt = Matcher::vector_element_basic_type(this);
18512     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18513   %}
18514   ins_pipe( pipe_slow );
18515 %}
18516 
18517 // Store vectors generic operand pattern.
18518 instruct storeV(memory mem, vec src) %{
18519   match(Set mem (StoreVector mem src));
18520   ins_cost(145);
18521   format %{ "store_vector $mem,$src\n\t" %}
18522   ins_encode %{
18523     switch (Matcher::vector_length_in_bytes(this, $src)) {
18524       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18525       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18526       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18527       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18528       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18529       default: ShouldNotReachHere();
18530     }
18531   %}
18532   ins_pipe( pipe_slow );
18533 %}
18534 
18535 // ---------------------------------------- Gather ------------------------------------
18536 
18537 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18538 
18539 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18540   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18541             Matcher::vector_length_in_bytes(n) <= 32);
18542   match(Set dst (LoadVectorGather mem idx));
18543   effect(TEMP dst, TEMP tmp, TEMP mask);
18544   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18545   ins_encode %{
18546     int vlen_enc = vector_length_encoding(this);
18547     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18548     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18549     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18550     __ lea($tmp$$Register, $mem$$Address);
18551     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18552   %}
18553   ins_pipe( pipe_slow );
18554 %}
18555 
18556 
18557 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18558   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18559             !is_subword_type(Matcher::vector_element_basic_type(n)));
18560   match(Set dst (LoadVectorGather mem idx));
18561   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18562   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18563   ins_encode %{
18564     int vlen_enc = vector_length_encoding(this);
18565     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18566     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18567     __ lea($tmp$$Register, $mem$$Address);
18568     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18569   %}
18570   ins_pipe( pipe_slow );
18571 %}
18572 
18573 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18574   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18575             !is_subword_type(Matcher::vector_element_basic_type(n)));
18576   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18577   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18578   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18579   ins_encode %{
18580     assert(UseAVX > 2, "sanity");
18581     int vlen_enc = vector_length_encoding(this);
18582     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18583     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18584     // Note: Since gather instruction partially updates the opmask register used
18585     // for predication hense moving mask operand to a temporary.
18586     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18587     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18588     __ lea($tmp$$Register, $mem$$Address);
18589     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18590   %}
18591   ins_pipe( pipe_slow );
18592 %}
18593 
18594 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18595   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18596   match(Set dst (LoadVectorGather mem idx_base));
18597   effect(TEMP tmp, TEMP rtmp);
18598   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18599   ins_encode %{
18600     int vlen_enc = vector_length_encoding(this);
18601     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18602     __ lea($tmp$$Register, $mem$$Address);
18603     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18604   %}
18605   ins_pipe( pipe_slow );
18606 %}
18607 
18608 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18609                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18610   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18611   match(Set dst (LoadVectorGather mem idx_base));
18612   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18613   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18614   ins_encode %{
18615     int vlen_enc = vector_length_encoding(this);
18616     int vector_len = Matcher::vector_length(this);
18617     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18618     __ lea($tmp$$Register, $mem$$Address);
18619     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18620     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18621                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18622   %}
18623   ins_pipe( pipe_slow );
18624 %}
18625 
18626 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18627   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18628   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18629   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18630   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18631   ins_encode %{
18632     int vlen_enc = vector_length_encoding(this);
18633     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18634     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18635     __ lea($tmp$$Register, $mem$$Address);
18636     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18637     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18638   %}
18639   ins_pipe( pipe_slow );
18640 %}
18641 
18642 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18643                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18644   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18645   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18646   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18647   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18648   ins_encode %{
18649     int vlen_enc = vector_length_encoding(this);
18650     int vector_len = Matcher::vector_length(this);
18651     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18652     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18653     __ lea($tmp$$Register, $mem$$Address);
18654     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18655     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18656     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18657                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18658   %}
18659   ins_pipe( pipe_slow );
18660 %}
18661 
18662 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18663   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18664   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18665   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18666   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18667   ins_encode %{
18668     int vlen_enc = vector_length_encoding(this);
18669     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18670     __ lea($tmp$$Register, $mem$$Address);
18671     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18672     if (elem_bt == T_SHORT) {
18673       __ movl($mask_idx$$Register, 0x55555555);
18674       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18675     }
18676     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18677     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18678   %}
18679   ins_pipe( pipe_slow );
18680 %}
18681 
18682 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18683                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18684   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18685   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18686   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18687   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18688   ins_encode %{
18689     int vlen_enc = vector_length_encoding(this);
18690     int vector_len = Matcher::vector_length(this);
18691     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18692     __ lea($tmp$$Register, $mem$$Address);
18693     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18694     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18695     if (elem_bt == T_SHORT) {
18696       __ movl($mask_idx$$Register, 0x55555555);
18697       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18698     }
18699     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18700     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18701                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18702   %}
18703   ins_pipe( pipe_slow );
18704 %}
18705 
18706 // ====================Scatter=======================================
18707 
18708 // Scatter INT, LONG, FLOAT, DOUBLE
18709 
18710 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18711   predicate(UseAVX > 2);
18712   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18713   effect(TEMP tmp, TEMP ktmp);
18714   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18715   ins_encode %{
18716     int vlen_enc = vector_length_encoding(this, $src);
18717     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18718 
18719     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18720     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18721 
18722     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18723     __ lea($tmp$$Register, $mem$$Address);
18724     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18725   %}
18726   ins_pipe( pipe_slow );
18727 %}
18728 
18729 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18730   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18731   effect(TEMP tmp, TEMP ktmp);
18732   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18733   ins_encode %{
18734     int vlen_enc = vector_length_encoding(this, $src);
18735     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18736     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18737     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18738     // Note: Since scatter instruction partially updates the opmask register used
18739     // for predication hense moving mask operand to a temporary.
18740     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18741     __ lea($tmp$$Register, $mem$$Address);
18742     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18743   %}
18744   ins_pipe( pipe_slow );
18745 %}
18746 
18747 // ====================REPLICATE=======================================
18748 
18749 // Replicate byte scalar to be vector
18750 instruct vReplB_reg(vec dst, rRegI src) %{
18751   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18752   match(Set dst (Replicate src));
18753   format %{ "replicateB $dst,$src" %}
18754   ins_encode %{
18755     uint vlen = Matcher::vector_length(this);
18756     if (UseAVX >= 2) {
18757       int vlen_enc = vector_length_encoding(this);
18758       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18759         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18760         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18761       } else {
18762         __ movdl($dst$$XMMRegister, $src$$Register);
18763         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18764       }
18765     } else {
18766        assert(UseAVX < 2, "");
18767       __ movdl($dst$$XMMRegister, $src$$Register);
18768       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18769       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18770       if (vlen >= 16) {
18771         assert(vlen == 16, "");
18772         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18773       }
18774     }
18775   %}
18776   ins_pipe( pipe_slow );
18777 %}
18778 
18779 instruct ReplB_mem(vec dst, memory mem) %{
18780   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18781   match(Set dst (Replicate (LoadB mem)));
18782   format %{ "replicateB $dst,$mem" %}
18783   ins_encode %{
18784     int vlen_enc = vector_length_encoding(this);
18785     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18786   %}
18787   ins_pipe( pipe_slow );
18788 %}
18789 
18790 // ====================ReplicateS=======================================
18791 
18792 instruct vReplS_reg(vec dst, rRegI src) %{
18793   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18794   match(Set dst (Replicate src));
18795   format %{ "replicateS $dst,$src" %}
18796   ins_encode %{
18797     uint vlen = Matcher::vector_length(this);
18798     int vlen_enc = vector_length_encoding(this);
18799     if (UseAVX >= 2) {
18800       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18801         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18802         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18803       } else {
18804         __ movdl($dst$$XMMRegister, $src$$Register);
18805         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18806       }
18807     } else {
18808       assert(UseAVX < 2, "");
18809       __ movdl($dst$$XMMRegister, $src$$Register);
18810       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18811       if (vlen >= 8) {
18812         assert(vlen == 8, "");
18813         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18814       }
18815     }
18816   %}
18817   ins_pipe( pipe_slow );
18818 %}
18819 
18820 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18821   match(Set dst (Replicate con));
18822   effect(TEMP rtmp);
18823   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18824   ins_encode %{
18825     int vlen_enc = vector_length_encoding(this);
18826     BasicType bt = Matcher::vector_element_basic_type(this);
18827     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18828     __ movl($rtmp$$Register, $con$$constant);
18829     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18830   %}
18831   ins_pipe( pipe_slow );
18832 %}
18833 
18834 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18835   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18836   match(Set dst (Replicate src));
18837   effect(TEMP rtmp);
18838   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18839   ins_encode %{
18840     int vlen_enc = vector_length_encoding(this);
18841     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18842     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18843   %}
18844   ins_pipe( pipe_slow );
18845 %}
18846 
18847 instruct ReplS_mem(vec dst, memory mem) %{
18848   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18849   match(Set dst (Replicate (LoadS mem)));
18850   format %{ "replicateS $dst,$mem" %}
18851   ins_encode %{
18852     int vlen_enc = vector_length_encoding(this);
18853     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18854   %}
18855   ins_pipe( pipe_slow );
18856 %}
18857 
18858 // ====================ReplicateI=======================================
18859 
18860 instruct ReplI_reg(vec dst, rRegI src) %{
18861   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18862   match(Set dst (Replicate src));
18863   format %{ "replicateI $dst,$src" %}
18864   ins_encode %{
18865     uint vlen = Matcher::vector_length(this);
18866     int vlen_enc = vector_length_encoding(this);
18867     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18868       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18869     } else if (VM_Version::supports_avx2()) {
18870       __ movdl($dst$$XMMRegister, $src$$Register);
18871       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18872     } else {
18873       __ movdl($dst$$XMMRegister, $src$$Register);
18874       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18875     }
18876   %}
18877   ins_pipe( pipe_slow );
18878 %}
18879 
18880 instruct ReplI_mem(vec dst, memory mem) %{
18881   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18882   match(Set dst (Replicate (LoadI mem)));
18883   format %{ "replicateI $dst,$mem" %}
18884   ins_encode %{
18885     int vlen_enc = vector_length_encoding(this);
18886     if (VM_Version::supports_avx2()) {
18887       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18888     } else if (VM_Version::supports_avx()) {
18889       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18890     } else {
18891       __ movdl($dst$$XMMRegister, $mem$$Address);
18892       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18893     }
18894   %}
18895   ins_pipe( pipe_slow );
18896 %}
18897 
18898 instruct ReplI_imm(vec dst, immI con) %{
18899   predicate(Matcher::is_non_long_integral_vector(n));
18900   match(Set dst (Replicate con));
18901   format %{ "replicateI $dst,$con" %}
18902   ins_encode %{
18903     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18904                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18905                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18906     BasicType bt = Matcher::vector_element_basic_type(this);
18907     int vlen = Matcher::vector_length_in_bytes(this);
18908     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18909   %}
18910   ins_pipe( pipe_slow );
18911 %}
18912 
18913 // Replicate scalar zero to be vector
18914 instruct ReplI_zero(vec dst, immI_0 zero) %{
18915   predicate(Matcher::is_non_long_integral_vector(n));
18916   match(Set dst (Replicate zero));
18917   format %{ "replicateI $dst,$zero" %}
18918   ins_encode %{
18919     int vlen_enc = vector_length_encoding(this);
18920     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18921       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18922     } else {
18923       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18924     }
18925   %}
18926   ins_pipe( fpu_reg_reg );
18927 %}
18928 
18929 instruct ReplI_M1(vec dst, immI_M1 con) %{
18930   predicate(Matcher::is_non_long_integral_vector(n));
18931   match(Set dst (Replicate con));
18932   format %{ "vallones $dst" %}
18933   ins_encode %{
18934     int vector_len = vector_length_encoding(this);
18935     __ vallones($dst$$XMMRegister, vector_len);
18936   %}
18937   ins_pipe( pipe_slow );
18938 %}
18939 
18940 // ====================ReplicateL=======================================
18941 
18942 // Replicate long (8 byte) scalar to be vector
18943 instruct ReplL_reg(vec dst, rRegL src) %{
18944   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18945   match(Set dst (Replicate src));
18946   format %{ "replicateL $dst,$src" %}
18947   ins_encode %{
18948     int vlen = Matcher::vector_length(this);
18949     int vlen_enc = vector_length_encoding(this);
18950     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18951       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18952     } else if (VM_Version::supports_avx2()) {
18953       __ movdq($dst$$XMMRegister, $src$$Register);
18954       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18955     } else {
18956       __ movdq($dst$$XMMRegister, $src$$Register);
18957       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18958     }
18959   %}
18960   ins_pipe( pipe_slow );
18961 %}
18962 
18963 instruct ReplL_mem(vec dst, memory mem) %{
18964   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18965   match(Set dst (Replicate (LoadL mem)));
18966   format %{ "replicateL $dst,$mem" %}
18967   ins_encode %{
18968     int vlen_enc = vector_length_encoding(this);
18969     if (VM_Version::supports_avx2()) {
18970       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18971     } else if (VM_Version::supports_sse3()) {
18972       __ movddup($dst$$XMMRegister, $mem$$Address);
18973     } else {
18974       __ movq($dst$$XMMRegister, $mem$$Address);
18975       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18976     }
18977   %}
18978   ins_pipe( pipe_slow );
18979 %}
18980 
18981 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18982 instruct ReplL_imm(vec dst, immL con) %{
18983   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18984   match(Set dst (Replicate con));
18985   format %{ "replicateL $dst,$con" %}
18986   ins_encode %{
18987     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18988     int vlen = Matcher::vector_length_in_bytes(this);
18989     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18990   %}
18991   ins_pipe( pipe_slow );
18992 %}
18993 
18994 instruct ReplL_zero(vec dst, immL0 zero) %{
18995   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18996   match(Set dst (Replicate zero));
18997   format %{ "replicateL $dst,$zero" %}
18998   ins_encode %{
18999     int vlen_enc = vector_length_encoding(this);
19000     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19001       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19002     } else {
19003       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19004     }
19005   %}
19006   ins_pipe( fpu_reg_reg );
19007 %}
19008 
19009 instruct ReplL_M1(vec dst, immL_M1 con) %{
19010   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19011   match(Set dst (Replicate con));
19012   format %{ "vallones $dst" %}
19013   ins_encode %{
19014     int vector_len = vector_length_encoding(this);
19015     __ vallones($dst$$XMMRegister, vector_len);
19016   %}
19017   ins_pipe( pipe_slow );
19018 %}
19019 
19020 // ====================ReplicateF=======================================
19021 
19022 instruct vReplF_reg(vec dst, vlRegF src) %{
19023   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19024   match(Set dst (Replicate src));
19025   format %{ "replicateF $dst,$src" %}
19026   ins_encode %{
19027     uint vlen = Matcher::vector_length(this);
19028     int vlen_enc = vector_length_encoding(this);
19029     if (vlen <= 4) {
19030       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19031     } else if (VM_Version::supports_avx2()) {
19032       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19033     } else {
19034       assert(vlen == 8, "sanity");
19035       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19036       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19037     }
19038   %}
19039   ins_pipe( pipe_slow );
19040 %}
19041 
19042 instruct ReplF_reg(vec dst, vlRegF src) %{
19043   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19044   match(Set dst (Replicate src));
19045   format %{ "replicateF $dst,$src" %}
19046   ins_encode %{
19047     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19048   %}
19049   ins_pipe( pipe_slow );
19050 %}
19051 
19052 instruct ReplF_mem(vec dst, memory mem) %{
19053   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19054   match(Set dst (Replicate (LoadF mem)));
19055   format %{ "replicateF $dst,$mem" %}
19056   ins_encode %{
19057     int vlen_enc = vector_length_encoding(this);
19058     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19059   %}
19060   ins_pipe( pipe_slow );
19061 %}
19062 
19063 // Replicate float scalar immediate to be vector by loading from const table.
19064 instruct ReplF_imm(vec dst, immF con) %{
19065   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19066   match(Set dst (Replicate con));
19067   format %{ "replicateF $dst,$con" %}
19068   ins_encode %{
19069     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19070                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19071     int vlen = Matcher::vector_length_in_bytes(this);
19072     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19073   %}
19074   ins_pipe( pipe_slow );
19075 %}
19076 
19077 instruct ReplF_zero(vec dst, immF0 zero) %{
19078   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19079   match(Set dst (Replicate zero));
19080   format %{ "replicateF $dst,$zero" %}
19081   ins_encode %{
19082     int vlen_enc = vector_length_encoding(this);
19083     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19084       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19085     } else {
19086       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19087     }
19088   %}
19089   ins_pipe( fpu_reg_reg );
19090 %}
19091 
19092 // ====================ReplicateD=======================================
19093 
19094 // Replicate double (8 bytes) scalar to be vector
19095 instruct vReplD_reg(vec dst, vlRegD src) %{
19096   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19097   match(Set dst (Replicate src));
19098   format %{ "replicateD $dst,$src" %}
19099   ins_encode %{
19100     uint vlen = Matcher::vector_length(this);
19101     int vlen_enc = vector_length_encoding(this);
19102     if (vlen <= 2) {
19103       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19104     } else if (VM_Version::supports_avx2()) {
19105       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19106     } else {
19107       assert(vlen == 4, "sanity");
19108       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19109       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19110     }
19111   %}
19112   ins_pipe( pipe_slow );
19113 %}
19114 
19115 instruct ReplD_reg(vec dst, vlRegD src) %{
19116   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19117   match(Set dst (Replicate src));
19118   format %{ "replicateD $dst,$src" %}
19119   ins_encode %{
19120     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19121   %}
19122   ins_pipe( pipe_slow );
19123 %}
19124 
19125 instruct ReplD_mem(vec dst, memory mem) %{
19126   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19127   match(Set dst (Replicate (LoadD mem)));
19128   format %{ "replicateD $dst,$mem" %}
19129   ins_encode %{
19130     if (Matcher::vector_length(this) >= 4) {
19131       int vlen_enc = vector_length_encoding(this);
19132       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19133     } else {
19134       __ movddup($dst$$XMMRegister, $mem$$Address);
19135     }
19136   %}
19137   ins_pipe( pipe_slow );
19138 %}
19139 
19140 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19141 instruct ReplD_imm(vec dst, immD con) %{
19142   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19143   match(Set dst (Replicate con));
19144   format %{ "replicateD $dst,$con" %}
19145   ins_encode %{
19146     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19147     int vlen = Matcher::vector_length_in_bytes(this);
19148     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19149   %}
19150   ins_pipe( pipe_slow );
19151 %}
19152 
19153 instruct ReplD_zero(vec dst, immD0 zero) %{
19154   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19155   match(Set dst (Replicate zero));
19156   format %{ "replicateD $dst,$zero" %}
19157   ins_encode %{
19158     int vlen_enc = vector_length_encoding(this);
19159     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19160       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19161     } else {
19162       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19163     }
19164   %}
19165   ins_pipe( fpu_reg_reg );
19166 %}
19167 
19168 // ====================VECTOR INSERT=======================================
19169 
19170 instruct insert(vec dst, rRegI val, immU8 idx) %{
19171   predicate(Matcher::vector_length_in_bytes(n) < 32);
19172   match(Set dst (VectorInsert (Binary dst val) idx));
19173   format %{ "vector_insert $dst,$val,$idx" %}
19174   ins_encode %{
19175     assert(UseSSE >= 4, "required");
19176     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19177 
19178     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19179 
19180     assert(is_integral_type(elem_bt), "");
19181     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19182 
19183     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19184   %}
19185   ins_pipe( pipe_slow );
19186 %}
19187 
19188 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19189   predicate(Matcher::vector_length_in_bytes(n) == 32);
19190   match(Set dst (VectorInsert (Binary src val) idx));
19191   effect(TEMP vtmp);
19192   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19193   ins_encode %{
19194     int vlen_enc = Assembler::AVX_256bit;
19195     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19196     int elem_per_lane = 16/type2aelembytes(elem_bt);
19197     int log2epr = log2(elem_per_lane);
19198 
19199     assert(is_integral_type(elem_bt), "sanity");
19200     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19201 
19202     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19203     uint y_idx = ($idx$$constant >> log2epr) & 1;
19204     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19205     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19206     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19207   %}
19208   ins_pipe( pipe_slow );
19209 %}
19210 
19211 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19212   predicate(Matcher::vector_length_in_bytes(n) == 64);
19213   match(Set dst (VectorInsert (Binary src val) idx));
19214   effect(TEMP vtmp);
19215   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19216   ins_encode %{
19217     assert(UseAVX > 2, "sanity");
19218 
19219     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19220     int elem_per_lane = 16/type2aelembytes(elem_bt);
19221     int log2epr = log2(elem_per_lane);
19222 
19223     assert(is_integral_type(elem_bt), "");
19224     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19225 
19226     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19227     uint y_idx = ($idx$$constant >> log2epr) & 3;
19228     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19229     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19230     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19231   %}
19232   ins_pipe( pipe_slow );
19233 %}
19234 
19235 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19236   predicate(Matcher::vector_length(n) == 2);
19237   match(Set dst (VectorInsert (Binary dst val) idx));
19238   format %{ "vector_insert $dst,$val,$idx" %}
19239   ins_encode %{
19240     assert(UseSSE >= 4, "required");
19241     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19242     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19243 
19244     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19245   %}
19246   ins_pipe( pipe_slow );
19247 %}
19248 
19249 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19250   predicate(Matcher::vector_length(n) == 4);
19251   match(Set dst (VectorInsert (Binary src val) idx));
19252   effect(TEMP vtmp);
19253   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19254   ins_encode %{
19255     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19256     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19257 
19258     uint x_idx = $idx$$constant & right_n_bits(1);
19259     uint y_idx = ($idx$$constant >> 1) & 1;
19260     int vlen_enc = Assembler::AVX_256bit;
19261     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19262     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19263     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19264   %}
19265   ins_pipe( pipe_slow );
19266 %}
19267 
19268 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19269   predicate(Matcher::vector_length(n) == 8);
19270   match(Set dst (VectorInsert (Binary src val) idx));
19271   effect(TEMP vtmp);
19272   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19273   ins_encode %{
19274     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19275     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19276 
19277     uint x_idx = $idx$$constant & right_n_bits(1);
19278     uint y_idx = ($idx$$constant >> 1) & 3;
19279     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19280     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19281     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19282   %}
19283   ins_pipe( pipe_slow );
19284 %}
19285 
19286 instruct insertF(vec dst, regF val, immU8 idx) %{
19287   predicate(Matcher::vector_length(n) < 8);
19288   match(Set dst (VectorInsert (Binary dst val) idx));
19289   format %{ "vector_insert $dst,$val,$idx" %}
19290   ins_encode %{
19291     assert(UseSSE >= 4, "sanity");
19292 
19293     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19294     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19295 
19296     uint x_idx = $idx$$constant & right_n_bits(2);
19297     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19298   %}
19299   ins_pipe( pipe_slow );
19300 %}
19301 
19302 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19303   predicate(Matcher::vector_length(n) >= 8);
19304   match(Set dst (VectorInsert (Binary src val) idx));
19305   effect(TEMP vtmp);
19306   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19307   ins_encode %{
19308     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19309     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19310 
19311     int vlen = Matcher::vector_length(this);
19312     uint x_idx = $idx$$constant & right_n_bits(2);
19313     if (vlen == 8) {
19314       uint y_idx = ($idx$$constant >> 2) & 1;
19315       int vlen_enc = Assembler::AVX_256bit;
19316       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19317       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19318       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19319     } else {
19320       assert(vlen == 16, "sanity");
19321       uint y_idx = ($idx$$constant >> 2) & 3;
19322       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19323       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19324       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19325     }
19326   %}
19327   ins_pipe( pipe_slow );
19328 %}
19329 
19330 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19331   predicate(Matcher::vector_length(n) == 2);
19332   match(Set dst (VectorInsert (Binary dst val) idx));
19333   effect(TEMP tmp);
19334   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19335   ins_encode %{
19336     assert(UseSSE >= 4, "sanity");
19337     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19338     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19339 
19340     __ movq($tmp$$Register, $val$$XMMRegister);
19341     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19342   %}
19343   ins_pipe( pipe_slow );
19344 %}
19345 
19346 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19347   predicate(Matcher::vector_length(n) == 4);
19348   match(Set dst (VectorInsert (Binary src val) idx));
19349   effect(TEMP vtmp, TEMP tmp);
19350   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19351   ins_encode %{
19352     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19353     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19354 
19355     uint x_idx = $idx$$constant & right_n_bits(1);
19356     uint y_idx = ($idx$$constant >> 1) & 1;
19357     int vlen_enc = Assembler::AVX_256bit;
19358     __ movq($tmp$$Register, $val$$XMMRegister);
19359     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19360     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19361     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19362   %}
19363   ins_pipe( pipe_slow );
19364 %}
19365 
19366 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19367   predicate(Matcher::vector_length(n) == 8);
19368   match(Set dst (VectorInsert (Binary src val) idx));
19369   effect(TEMP tmp, TEMP vtmp);
19370   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19371   ins_encode %{
19372     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19373     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19374 
19375     uint x_idx = $idx$$constant & right_n_bits(1);
19376     uint y_idx = ($idx$$constant >> 1) & 3;
19377     __ movq($tmp$$Register, $val$$XMMRegister);
19378     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19379     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19380     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19381   %}
19382   ins_pipe( pipe_slow );
19383 %}
19384 
19385 // ====================REDUCTION ARITHMETIC=======================================
19386 
19387 // =======================Int Reduction==========================================
19388 
19389 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19390   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19391   match(Set dst (AddReductionVI src1 src2));
19392   match(Set dst (MulReductionVI src1 src2));
19393   match(Set dst (AndReductionV  src1 src2));
19394   match(Set dst ( OrReductionV  src1 src2));
19395   match(Set dst (XorReductionV  src1 src2));
19396   match(Set dst (MinReductionV  src1 src2));
19397   match(Set dst (MaxReductionV  src1 src2));
19398   match(Set dst (UMinReductionV  src1 src2));
19399   match(Set dst (UMaxReductionV  src1 src2));
19400   effect(TEMP vtmp1, TEMP vtmp2);
19401   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19402   ins_encode %{
19403     int opcode = this->ideal_Opcode();
19404     int vlen = Matcher::vector_length(this, $src2);
19405     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19406   %}
19407   ins_pipe( pipe_slow );
19408 %}
19409 
19410 // =======================Long Reduction==========================================
19411 
19412 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19413   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19414   match(Set dst (AddReductionVL src1 src2));
19415   match(Set dst (MulReductionVL src1 src2));
19416   match(Set dst (AndReductionV  src1 src2));
19417   match(Set dst ( OrReductionV  src1 src2));
19418   match(Set dst (XorReductionV  src1 src2));
19419   match(Set dst (MinReductionV  src1 src2));
19420   match(Set dst (MaxReductionV  src1 src2));
19421   match(Set dst (UMinReductionV  src1 src2));
19422   match(Set dst (UMaxReductionV  src1 src2));
19423   effect(TEMP vtmp1, TEMP vtmp2);
19424   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19425   ins_encode %{
19426     int opcode = this->ideal_Opcode();
19427     int vlen = Matcher::vector_length(this, $src2);
19428     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19429   %}
19430   ins_pipe( pipe_slow );
19431 %}
19432 
19433 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19434   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19435   match(Set dst (AddReductionVL src1 src2));
19436   match(Set dst (MulReductionVL src1 src2));
19437   match(Set dst (AndReductionV  src1 src2));
19438   match(Set dst ( OrReductionV  src1 src2));
19439   match(Set dst (XorReductionV  src1 src2));
19440   match(Set dst (MinReductionV  src1 src2));
19441   match(Set dst (MaxReductionV  src1 src2));
19442   match(Set dst (UMinReductionV  src1 src2));
19443   match(Set dst (UMaxReductionV  src1 src2));
19444   effect(TEMP vtmp1, TEMP vtmp2);
19445   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19446   ins_encode %{
19447     int opcode = this->ideal_Opcode();
19448     int vlen = Matcher::vector_length(this, $src2);
19449     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19450   %}
19451   ins_pipe( pipe_slow );
19452 %}
19453 
19454 // =======================Float Reduction==========================================
19455 
19456 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19457   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19458   match(Set dst (AddReductionVF dst src));
19459   match(Set dst (MulReductionVF dst src));
19460   effect(TEMP dst, TEMP vtmp);
19461   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19462   ins_encode %{
19463     int opcode = this->ideal_Opcode();
19464     int vlen = Matcher::vector_length(this, $src);
19465     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19466   %}
19467   ins_pipe( pipe_slow );
19468 %}
19469 
19470 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19471   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19472   match(Set dst (AddReductionVF dst src));
19473   match(Set dst (MulReductionVF dst src));
19474   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19475   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19476   ins_encode %{
19477     int opcode = this->ideal_Opcode();
19478     int vlen = Matcher::vector_length(this, $src);
19479     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19480   %}
19481   ins_pipe( pipe_slow );
19482 %}
19483 
19484 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19485   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19486   match(Set dst (AddReductionVF dst src));
19487   match(Set dst (MulReductionVF dst src));
19488   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19489   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19490   ins_encode %{
19491     int opcode = this->ideal_Opcode();
19492     int vlen = Matcher::vector_length(this, $src);
19493     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19494   %}
19495   ins_pipe( pipe_slow );
19496 %}
19497 
19498 
19499 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19500   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19501   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19502   // src1 contains reduction identity
19503   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19504   match(Set dst (AddReductionVF src1 src2));
19505   match(Set dst (MulReductionVF src1 src2));
19506   effect(TEMP dst);
19507   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19508   ins_encode %{
19509     int opcode = this->ideal_Opcode();
19510     int vlen = Matcher::vector_length(this, $src2);
19511     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19512   %}
19513   ins_pipe( pipe_slow );
19514 %}
19515 
19516 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19517   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19518   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19519   // src1 contains reduction identity
19520   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19521   match(Set dst (AddReductionVF src1 src2));
19522   match(Set dst (MulReductionVF src1 src2));
19523   effect(TEMP dst, TEMP vtmp);
19524   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19525   ins_encode %{
19526     int opcode = this->ideal_Opcode();
19527     int vlen = Matcher::vector_length(this, $src2);
19528     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19529   %}
19530   ins_pipe( pipe_slow );
19531 %}
19532 
19533 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19534   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19535   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19536   // src1 contains reduction identity
19537   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19538   match(Set dst (AddReductionVF src1 src2));
19539   match(Set dst (MulReductionVF src1 src2));
19540   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19541   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19542   ins_encode %{
19543     int opcode = this->ideal_Opcode();
19544     int vlen = Matcher::vector_length(this, $src2);
19545     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19546   %}
19547   ins_pipe( pipe_slow );
19548 %}
19549 
19550 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19551   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19552   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19553   // src1 contains reduction identity
19554   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19555   match(Set dst (AddReductionVF src1 src2));
19556   match(Set dst (MulReductionVF src1 src2));
19557   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19558   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19559   ins_encode %{
19560     int opcode = this->ideal_Opcode();
19561     int vlen = Matcher::vector_length(this, $src2);
19562     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19563   %}
19564   ins_pipe( pipe_slow );
19565 %}
19566 
19567 // =======================Double Reduction==========================================
19568 
19569 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19570   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19571   match(Set dst (AddReductionVD dst src));
19572   match(Set dst (MulReductionVD dst src));
19573   effect(TEMP dst, TEMP vtmp);
19574   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19575   ins_encode %{
19576     int opcode = this->ideal_Opcode();
19577     int vlen = Matcher::vector_length(this, $src);
19578     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19579 %}
19580   ins_pipe( pipe_slow );
19581 %}
19582 
19583 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19584   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19585   match(Set dst (AddReductionVD dst src));
19586   match(Set dst (MulReductionVD dst src));
19587   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19588   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19589   ins_encode %{
19590     int opcode = this->ideal_Opcode();
19591     int vlen = Matcher::vector_length(this, $src);
19592     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19593   %}
19594   ins_pipe( pipe_slow );
19595 %}
19596 
19597 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19598   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19599   match(Set dst (AddReductionVD dst src));
19600   match(Set dst (MulReductionVD dst src));
19601   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19602   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19603   ins_encode %{
19604     int opcode = this->ideal_Opcode();
19605     int vlen = Matcher::vector_length(this, $src);
19606     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19607   %}
19608   ins_pipe( pipe_slow );
19609 %}
19610 
19611 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19612   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19613   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19614   // src1 contains reduction identity
19615   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19616   match(Set dst (AddReductionVD src1 src2));
19617   match(Set dst (MulReductionVD src1 src2));
19618   effect(TEMP dst);
19619   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19620   ins_encode %{
19621     int opcode = this->ideal_Opcode();
19622     int vlen = Matcher::vector_length(this, $src2);
19623     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19624 %}
19625   ins_pipe( pipe_slow );
19626 %}
19627 
19628 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19629   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19630   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19631   // src1 contains reduction identity
19632   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19633   match(Set dst (AddReductionVD src1 src2));
19634   match(Set dst (MulReductionVD src1 src2));
19635   effect(TEMP dst, TEMP vtmp);
19636   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19637   ins_encode %{
19638     int opcode = this->ideal_Opcode();
19639     int vlen = Matcher::vector_length(this, $src2);
19640     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19641   %}
19642   ins_pipe( pipe_slow );
19643 %}
19644 
19645 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19646   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19647   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19648   // src1 contains reduction identity
19649   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19650   match(Set dst (AddReductionVD src1 src2));
19651   match(Set dst (MulReductionVD src1 src2));
19652   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19653   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19654   ins_encode %{
19655     int opcode = this->ideal_Opcode();
19656     int vlen = Matcher::vector_length(this, $src2);
19657     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19658   %}
19659   ins_pipe( pipe_slow );
19660 %}
19661 
19662 // =======================Byte Reduction==========================================
19663 
19664 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19665   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19666   match(Set dst (AddReductionVI src1 src2));
19667   match(Set dst (AndReductionV  src1 src2));
19668   match(Set dst ( OrReductionV  src1 src2));
19669   match(Set dst (XorReductionV  src1 src2));
19670   match(Set dst (MinReductionV  src1 src2));
19671   match(Set dst (MaxReductionV  src1 src2));
19672   match(Set dst (UMinReductionV  src1 src2));
19673   match(Set dst (UMaxReductionV  src1 src2));
19674   effect(TEMP vtmp1, TEMP vtmp2);
19675   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19676   ins_encode %{
19677     int opcode = this->ideal_Opcode();
19678     int vlen = Matcher::vector_length(this, $src2);
19679     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19680   %}
19681   ins_pipe( pipe_slow );
19682 %}
19683 
19684 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19685   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19686   match(Set dst (AddReductionVI src1 src2));
19687   match(Set dst (AndReductionV  src1 src2));
19688   match(Set dst ( OrReductionV  src1 src2));
19689   match(Set dst (XorReductionV  src1 src2));
19690   match(Set dst (MinReductionV  src1 src2));
19691   match(Set dst (MaxReductionV  src1 src2));
19692   match(Set dst (UMinReductionV  src1 src2));
19693   match(Set dst (UMaxReductionV  src1 src2));
19694   effect(TEMP vtmp1, TEMP vtmp2);
19695   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19696   ins_encode %{
19697     int opcode = this->ideal_Opcode();
19698     int vlen = Matcher::vector_length(this, $src2);
19699     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19700   %}
19701   ins_pipe( pipe_slow );
19702 %}
19703 
19704 // =======================Short Reduction==========================================
19705 
19706 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19707   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19708   match(Set dst (AddReductionVI src1 src2));
19709   match(Set dst (MulReductionVI src1 src2));
19710   match(Set dst (AndReductionV  src1 src2));
19711   match(Set dst ( OrReductionV  src1 src2));
19712   match(Set dst (XorReductionV  src1 src2));
19713   match(Set dst (MinReductionV  src1 src2));
19714   match(Set dst (MaxReductionV  src1 src2));
19715   match(Set dst (UMinReductionV  src1 src2));
19716   match(Set dst (UMaxReductionV  src1 src2));
19717   effect(TEMP vtmp1, TEMP vtmp2);
19718   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19719   ins_encode %{
19720     int opcode = this->ideal_Opcode();
19721     int vlen = Matcher::vector_length(this, $src2);
19722     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19723   %}
19724   ins_pipe( pipe_slow );
19725 %}
19726 
19727 // =======================Mul Reduction==========================================
19728 
19729 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19730   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19731             Matcher::vector_length(n->in(2)) <= 32); // src2
19732   match(Set dst (MulReductionVI src1 src2));
19733   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19734   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19735   ins_encode %{
19736     int opcode = this->ideal_Opcode();
19737     int vlen = Matcher::vector_length(this, $src2);
19738     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19739   %}
19740   ins_pipe( pipe_slow );
19741 %}
19742 
19743 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19744   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19745             Matcher::vector_length(n->in(2)) == 64); // src2
19746   match(Set dst (MulReductionVI src1 src2));
19747   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19748   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19749   ins_encode %{
19750     int opcode = this->ideal_Opcode();
19751     int vlen = Matcher::vector_length(this, $src2);
19752     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19753   %}
19754   ins_pipe( pipe_slow );
19755 %}
19756 
19757 //--------------------Min/Max Float Reduction --------------------
19758 // Float Min Reduction
19759 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19760                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19761   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19762             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19763              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19764             Matcher::vector_length(n->in(2)) == 2);
19765   match(Set dst (MinReductionV src1 src2));
19766   match(Set dst (MaxReductionV src1 src2));
19767   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19768   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19769   ins_encode %{
19770     assert(UseAVX > 0, "sanity");
19771 
19772     int opcode = this->ideal_Opcode();
19773     int vlen = Matcher::vector_length(this, $src2);
19774     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19775                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19776   %}
19777   ins_pipe( pipe_slow );
19778 %}
19779 
19780 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19781                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19782   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19783             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19784              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19785             Matcher::vector_length(n->in(2)) >= 4);
19786   match(Set dst (MinReductionV src1 src2));
19787   match(Set dst (MaxReductionV src1 src2));
19788   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19789   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19790   ins_encode %{
19791     assert(UseAVX > 0, "sanity");
19792 
19793     int opcode = this->ideal_Opcode();
19794     int vlen = Matcher::vector_length(this, $src2);
19795     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19796                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19797   %}
19798   ins_pipe( pipe_slow );
19799 %}
19800 
19801 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19802                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19803   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19804             Matcher::vector_length(n->in(2)) == 2);
19805   match(Set dst (MinReductionV dst src));
19806   match(Set dst (MaxReductionV dst src));
19807   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19808   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19809   ins_encode %{
19810     assert(UseAVX > 0, "sanity");
19811 
19812     int opcode = this->ideal_Opcode();
19813     int vlen = Matcher::vector_length(this, $src);
19814     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19815                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19816   %}
19817   ins_pipe( pipe_slow );
19818 %}
19819 
19820 
19821 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19822                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19823   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19824             Matcher::vector_length(n->in(2)) >= 4);
19825   match(Set dst (MinReductionV dst src));
19826   match(Set dst (MaxReductionV dst src));
19827   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19828   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19829   ins_encode %{
19830     assert(UseAVX > 0, "sanity");
19831 
19832     int opcode = this->ideal_Opcode();
19833     int vlen = Matcher::vector_length(this, $src);
19834     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19835                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19836   %}
19837   ins_pipe( pipe_slow );
19838 %}
19839 
19840 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19841   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19842             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19843              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19844             Matcher::vector_length(n->in(2)) == 2);
19845   match(Set dst (MinReductionV src1 src2));
19846   match(Set dst (MaxReductionV src1 src2));
19847   effect(TEMP dst, TEMP xtmp1);
19848   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19849   ins_encode %{
19850     int opcode = this->ideal_Opcode();
19851     int vlen = Matcher::vector_length(this, $src2);
19852     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19853                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19854   %}
19855   ins_pipe( pipe_slow );
19856 %}
19857 
19858 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19859   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19860             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19861              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19862             Matcher::vector_length(n->in(2)) >= 4);
19863   match(Set dst (MinReductionV src1 src2));
19864   match(Set dst (MaxReductionV src1 src2));
19865   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19866   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19867   ins_encode %{
19868     int opcode = this->ideal_Opcode();
19869     int vlen = Matcher::vector_length(this, $src2);
19870     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19871                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19872   %}
19873   ins_pipe( pipe_slow );
19874 %}
19875 
19876 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19877   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19878             Matcher::vector_length(n->in(2)) == 2);
19879   match(Set dst (MinReductionV dst src));
19880   match(Set dst (MaxReductionV dst src));
19881   effect(TEMP dst, TEMP xtmp1);
19882   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19883   ins_encode %{
19884     int opcode = this->ideal_Opcode();
19885     int vlen = Matcher::vector_length(this, $src);
19886     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19887                          $xtmp1$$XMMRegister);
19888   %}
19889   ins_pipe( pipe_slow );
19890 %}
19891 
19892 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19893   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19894             Matcher::vector_length(n->in(2)) >= 4);
19895   match(Set dst (MinReductionV dst src));
19896   match(Set dst (MaxReductionV dst src));
19897   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19898   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19899   ins_encode %{
19900     int opcode = this->ideal_Opcode();
19901     int vlen = Matcher::vector_length(this, $src);
19902     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19903                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19904   %}
19905   ins_pipe( pipe_slow );
19906 %}
19907 
19908 //--------------------Min Double Reduction --------------------
19909 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19910                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19911   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19912             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19913              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19914             Matcher::vector_length(n->in(2)) == 2);
19915   match(Set dst (MinReductionV src1 src2));
19916   match(Set dst (MaxReductionV src1 src2));
19917   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19918   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19919   ins_encode %{
19920     assert(UseAVX > 0, "sanity");
19921 
19922     int opcode = this->ideal_Opcode();
19923     int vlen = Matcher::vector_length(this, $src2);
19924     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19925                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19926   %}
19927   ins_pipe( pipe_slow );
19928 %}
19929 
19930 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19931                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19932   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19933             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19934              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19935             Matcher::vector_length(n->in(2)) >= 4);
19936   match(Set dst (MinReductionV src1 src2));
19937   match(Set dst (MaxReductionV src1 src2));
19938   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19939   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19940   ins_encode %{
19941     assert(UseAVX > 0, "sanity");
19942 
19943     int opcode = this->ideal_Opcode();
19944     int vlen = Matcher::vector_length(this, $src2);
19945     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19946                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19947   %}
19948   ins_pipe( pipe_slow );
19949 %}
19950 
19951 
19952 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19953                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19954   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19955             Matcher::vector_length(n->in(2)) == 2);
19956   match(Set dst (MinReductionV dst src));
19957   match(Set dst (MaxReductionV dst src));
19958   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19959   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19960   ins_encode %{
19961     assert(UseAVX > 0, "sanity");
19962 
19963     int opcode = this->ideal_Opcode();
19964     int vlen = Matcher::vector_length(this, $src);
19965     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19966                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19967   %}
19968   ins_pipe( pipe_slow );
19969 %}
19970 
19971 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19972                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19973   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19974             Matcher::vector_length(n->in(2)) >= 4);
19975   match(Set dst (MinReductionV dst src));
19976   match(Set dst (MaxReductionV dst src));
19977   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19978   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19979   ins_encode %{
19980     assert(UseAVX > 0, "sanity");
19981 
19982     int opcode = this->ideal_Opcode();
19983     int vlen = Matcher::vector_length(this, $src);
19984     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19985                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19986   %}
19987   ins_pipe( pipe_slow );
19988 %}
19989 
19990 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19991   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19992             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19993              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19994             Matcher::vector_length(n->in(2)) == 2);
19995   match(Set dst (MinReductionV src1 src2));
19996   match(Set dst (MaxReductionV src1 src2));
19997   effect(TEMP dst, TEMP xtmp1);
19998   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19999   ins_encode %{
20000     int opcode = this->ideal_Opcode();
20001     int vlen = Matcher::vector_length(this, $src2);
20002     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20003                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20004   %}
20005   ins_pipe( pipe_slow );
20006 %}
20007 
20008 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20009   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20010             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20011              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20012             Matcher::vector_length(n->in(2)) >= 4);
20013   match(Set dst (MinReductionV src1 src2));
20014   match(Set dst (MaxReductionV src1 src2));
20015   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20016   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20017   ins_encode %{
20018     int opcode = this->ideal_Opcode();
20019     int vlen = Matcher::vector_length(this, $src2);
20020     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20021                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20022   %}
20023   ins_pipe( pipe_slow );
20024 %}
20025 
20026 
20027 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20028   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20029             Matcher::vector_length(n->in(2)) == 2);
20030   match(Set dst (MinReductionV dst src));
20031   match(Set dst (MaxReductionV dst src));
20032   effect(TEMP dst, TEMP xtmp1);
20033   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20034   ins_encode %{
20035     int opcode = this->ideal_Opcode();
20036     int vlen = Matcher::vector_length(this, $src);
20037     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20038                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20039   %}
20040   ins_pipe( pipe_slow );
20041 %}
20042 
20043 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20044   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20045             Matcher::vector_length(n->in(2)) >= 4);
20046   match(Set dst (MinReductionV dst src));
20047   match(Set dst (MaxReductionV dst src));
20048   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20049   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20050   ins_encode %{
20051     int opcode = this->ideal_Opcode();
20052     int vlen = Matcher::vector_length(this, $src);
20053     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20054                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20055   %}
20056   ins_pipe( pipe_slow );
20057 %}
20058 
20059 // ====================VECTOR ARITHMETIC=======================================
20060 
20061 // --------------------------------- ADD --------------------------------------
20062 
20063 // Bytes vector add
20064 instruct vaddB(vec dst, vec src) %{
20065   predicate(UseAVX == 0);
20066   match(Set dst (AddVB dst src));
20067   format %{ "paddb   $dst,$src\t! add packedB" %}
20068   ins_encode %{
20069     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20070   %}
20071   ins_pipe( pipe_slow );
20072 %}
20073 
20074 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20075   predicate(UseAVX > 0);
20076   match(Set dst (AddVB src1 src2));
20077   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20078   ins_encode %{
20079     int vlen_enc = vector_length_encoding(this);
20080     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20081   %}
20082   ins_pipe( pipe_slow );
20083 %}
20084 
20085 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20086   predicate((UseAVX > 0) &&
20087             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20088   match(Set dst (AddVB src (LoadVector mem)));
20089   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20090   ins_encode %{
20091     int vlen_enc = vector_length_encoding(this);
20092     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20093   %}
20094   ins_pipe( pipe_slow );
20095 %}
20096 
20097 // Shorts/Chars vector add
20098 instruct vaddS(vec dst, vec src) %{
20099   predicate(UseAVX == 0);
20100   match(Set dst (AddVS dst src));
20101   format %{ "paddw   $dst,$src\t! add packedS" %}
20102   ins_encode %{
20103     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20104   %}
20105   ins_pipe( pipe_slow );
20106 %}
20107 
20108 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20109   predicate(UseAVX > 0);
20110   match(Set dst (AddVS src1 src2));
20111   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20112   ins_encode %{
20113     int vlen_enc = vector_length_encoding(this);
20114     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20115   %}
20116   ins_pipe( pipe_slow );
20117 %}
20118 
20119 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20120   predicate((UseAVX > 0) &&
20121             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20122   match(Set dst (AddVS src (LoadVector mem)));
20123   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20124   ins_encode %{
20125     int vlen_enc = vector_length_encoding(this);
20126     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20127   %}
20128   ins_pipe( pipe_slow );
20129 %}
20130 
20131 // Integers vector add
20132 instruct vaddI(vec dst, vec src) %{
20133   predicate(UseAVX == 0);
20134   match(Set dst (AddVI dst src));
20135   format %{ "paddd   $dst,$src\t! add packedI" %}
20136   ins_encode %{
20137     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20138   %}
20139   ins_pipe( pipe_slow );
20140 %}
20141 
20142 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20143   predicate(UseAVX > 0);
20144   match(Set dst (AddVI src1 src2));
20145   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20146   ins_encode %{
20147     int vlen_enc = vector_length_encoding(this);
20148     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20149   %}
20150   ins_pipe( pipe_slow );
20151 %}
20152 
20153 
20154 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20155   predicate((UseAVX > 0) &&
20156             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20157   match(Set dst (AddVI src (LoadVector mem)));
20158   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20159   ins_encode %{
20160     int vlen_enc = vector_length_encoding(this);
20161     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20162   %}
20163   ins_pipe( pipe_slow );
20164 %}
20165 
20166 // Longs vector add
20167 instruct vaddL(vec dst, vec src) %{
20168   predicate(UseAVX == 0);
20169   match(Set dst (AddVL dst src));
20170   format %{ "paddq   $dst,$src\t! add packedL" %}
20171   ins_encode %{
20172     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20173   %}
20174   ins_pipe( pipe_slow );
20175 %}
20176 
20177 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20178   predicate(UseAVX > 0);
20179   match(Set dst (AddVL src1 src2));
20180   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20181   ins_encode %{
20182     int vlen_enc = vector_length_encoding(this);
20183     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20184   %}
20185   ins_pipe( pipe_slow );
20186 %}
20187 
20188 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20189   predicate((UseAVX > 0) &&
20190             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20191   match(Set dst (AddVL src (LoadVector mem)));
20192   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20193   ins_encode %{
20194     int vlen_enc = vector_length_encoding(this);
20195     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20196   %}
20197   ins_pipe( pipe_slow );
20198 %}
20199 
20200 // Floats vector add
20201 instruct vaddF(vec dst, vec src) %{
20202   predicate(UseAVX == 0);
20203   match(Set dst (AddVF dst src));
20204   format %{ "addps   $dst,$src\t! add packedF" %}
20205   ins_encode %{
20206     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20207   %}
20208   ins_pipe( pipe_slow );
20209 %}
20210 
20211 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20212   predicate(UseAVX > 0);
20213   match(Set dst (AddVF src1 src2));
20214   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20215   ins_encode %{
20216     int vlen_enc = vector_length_encoding(this);
20217     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20218   %}
20219   ins_pipe( pipe_slow );
20220 %}
20221 
20222 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20223   predicate((UseAVX > 0) &&
20224             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20225   match(Set dst (AddVF src (LoadVector mem)));
20226   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20227   ins_encode %{
20228     int vlen_enc = vector_length_encoding(this);
20229     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20230   %}
20231   ins_pipe( pipe_slow );
20232 %}
20233 
20234 // Doubles vector add
20235 instruct vaddD(vec dst, vec src) %{
20236   predicate(UseAVX == 0);
20237   match(Set dst (AddVD dst src));
20238   format %{ "addpd   $dst,$src\t! add packedD" %}
20239   ins_encode %{
20240     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20241   %}
20242   ins_pipe( pipe_slow );
20243 %}
20244 
20245 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20246   predicate(UseAVX > 0);
20247   match(Set dst (AddVD src1 src2));
20248   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20249   ins_encode %{
20250     int vlen_enc = vector_length_encoding(this);
20251     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20252   %}
20253   ins_pipe( pipe_slow );
20254 %}
20255 
20256 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20257   predicate((UseAVX > 0) &&
20258             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20259   match(Set dst (AddVD src (LoadVector mem)));
20260   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20261   ins_encode %{
20262     int vlen_enc = vector_length_encoding(this);
20263     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20264   %}
20265   ins_pipe( pipe_slow );
20266 %}
20267 
20268 // --------------------------------- SUB --------------------------------------
20269 
20270 // Bytes vector sub
20271 instruct vsubB(vec dst, vec src) %{
20272   predicate(UseAVX == 0);
20273   match(Set dst (SubVB dst src));
20274   format %{ "psubb   $dst,$src\t! sub packedB" %}
20275   ins_encode %{
20276     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20277   %}
20278   ins_pipe( pipe_slow );
20279 %}
20280 
20281 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20282   predicate(UseAVX > 0);
20283   match(Set dst (SubVB src1 src2));
20284   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20285   ins_encode %{
20286     int vlen_enc = vector_length_encoding(this);
20287     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20288   %}
20289   ins_pipe( pipe_slow );
20290 %}
20291 
20292 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20293   predicate((UseAVX > 0) &&
20294             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20295   match(Set dst (SubVB src (LoadVector mem)));
20296   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20297   ins_encode %{
20298     int vlen_enc = vector_length_encoding(this);
20299     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20300   %}
20301   ins_pipe( pipe_slow );
20302 %}
20303 
20304 // Shorts/Chars vector sub
20305 instruct vsubS(vec dst, vec src) %{
20306   predicate(UseAVX == 0);
20307   match(Set dst (SubVS dst src));
20308   format %{ "psubw   $dst,$src\t! sub packedS" %}
20309   ins_encode %{
20310     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20311   %}
20312   ins_pipe( pipe_slow );
20313 %}
20314 
20315 
20316 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20317   predicate(UseAVX > 0);
20318   match(Set dst (SubVS src1 src2));
20319   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20320   ins_encode %{
20321     int vlen_enc = vector_length_encoding(this);
20322     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20323   %}
20324   ins_pipe( pipe_slow );
20325 %}
20326 
20327 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20328   predicate((UseAVX > 0) &&
20329             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20330   match(Set dst (SubVS src (LoadVector mem)));
20331   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20332   ins_encode %{
20333     int vlen_enc = vector_length_encoding(this);
20334     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20335   %}
20336   ins_pipe( pipe_slow );
20337 %}
20338 
20339 // Integers vector sub
20340 instruct vsubI(vec dst, vec src) %{
20341   predicate(UseAVX == 0);
20342   match(Set dst (SubVI dst src));
20343   format %{ "psubd   $dst,$src\t! sub packedI" %}
20344   ins_encode %{
20345     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20346   %}
20347   ins_pipe( pipe_slow );
20348 %}
20349 
20350 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20351   predicate(UseAVX > 0);
20352   match(Set dst (SubVI src1 src2));
20353   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20354   ins_encode %{
20355     int vlen_enc = vector_length_encoding(this);
20356     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20357   %}
20358   ins_pipe( pipe_slow );
20359 %}
20360 
20361 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20362   predicate((UseAVX > 0) &&
20363             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20364   match(Set dst (SubVI src (LoadVector mem)));
20365   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20366   ins_encode %{
20367     int vlen_enc = vector_length_encoding(this);
20368     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20369   %}
20370   ins_pipe( pipe_slow );
20371 %}
20372 
20373 // Longs vector sub
20374 instruct vsubL(vec dst, vec src) %{
20375   predicate(UseAVX == 0);
20376   match(Set dst (SubVL dst src));
20377   format %{ "psubq   $dst,$src\t! sub packedL" %}
20378   ins_encode %{
20379     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20380   %}
20381   ins_pipe( pipe_slow );
20382 %}
20383 
20384 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20385   predicate(UseAVX > 0);
20386   match(Set dst (SubVL src1 src2));
20387   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20388   ins_encode %{
20389     int vlen_enc = vector_length_encoding(this);
20390     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20391   %}
20392   ins_pipe( pipe_slow );
20393 %}
20394 
20395 
20396 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20397   predicate((UseAVX > 0) &&
20398             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20399   match(Set dst (SubVL src (LoadVector mem)));
20400   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20401   ins_encode %{
20402     int vlen_enc = vector_length_encoding(this);
20403     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20404   %}
20405   ins_pipe( pipe_slow );
20406 %}
20407 
20408 // Floats vector sub
20409 instruct vsubF(vec dst, vec src) %{
20410   predicate(UseAVX == 0);
20411   match(Set dst (SubVF dst src));
20412   format %{ "subps   $dst,$src\t! sub packedF" %}
20413   ins_encode %{
20414     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20415   %}
20416   ins_pipe( pipe_slow );
20417 %}
20418 
20419 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20420   predicate(UseAVX > 0);
20421   match(Set dst (SubVF src1 src2));
20422   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20423   ins_encode %{
20424     int vlen_enc = vector_length_encoding(this);
20425     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20426   %}
20427   ins_pipe( pipe_slow );
20428 %}
20429 
20430 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20431   predicate((UseAVX > 0) &&
20432             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20433   match(Set dst (SubVF src (LoadVector mem)));
20434   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20435   ins_encode %{
20436     int vlen_enc = vector_length_encoding(this);
20437     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20438   %}
20439   ins_pipe( pipe_slow );
20440 %}
20441 
20442 // Doubles vector sub
20443 instruct vsubD(vec dst, vec src) %{
20444   predicate(UseAVX == 0);
20445   match(Set dst (SubVD dst src));
20446   format %{ "subpd   $dst,$src\t! sub packedD" %}
20447   ins_encode %{
20448     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20449   %}
20450   ins_pipe( pipe_slow );
20451 %}
20452 
20453 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20454   predicate(UseAVX > 0);
20455   match(Set dst (SubVD src1 src2));
20456   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20457   ins_encode %{
20458     int vlen_enc = vector_length_encoding(this);
20459     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20460   %}
20461   ins_pipe( pipe_slow );
20462 %}
20463 
20464 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20465   predicate((UseAVX > 0) &&
20466             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20467   match(Set dst (SubVD src (LoadVector mem)));
20468   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20469   ins_encode %{
20470     int vlen_enc = vector_length_encoding(this);
20471     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20472   %}
20473   ins_pipe( pipe_slow );
20474 %}
20475 
20476 // --------------------------------- MUL --------------------------------------
20477 
20478 // Byte vector mul
20479 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20480   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20481   match(Set dst (MulVB src1 src2));
20482   effect(TEMP dst, TEMP xtmp);
20483   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20484   ins_encode %{
20485     assert(UseSSE > 3, "required");
20486     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20487     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20488     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20489     __ psllw($dst$$XMMRegister, 8);
20490     __ psrlw($dst$$XMMRegister, 8);
20491     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20492   %}
20493   ins_pipe( pipe_slow );
20494 %}
20495 
20496 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20497   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20498   match(Set dst (MulVB src1 src2));
20499   effect(TEMP dst, TEMP xtmp);
20500   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20501   ins_encode %{
20502     assert(UseSSE > 3, "required");
20503     // Odd-index elements
20504     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20505     __ psrlw($dst$$XMMRegister, 8);
20506     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20507     __ psrlw($xtmp$$XMMRegister, 8);
20508     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20509     __ psllw($dst$$XMMRegister, 8);
20510     // Even-index elements
20511     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20512     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20513     __ psllw($xtmp$$XMMRegister, 8);
20514     __ psrlw($xtmp$$XMMRegister, 8);
20515     // Combine
20516     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20517   %}
20518   ins_pipe( pipe_slow );
20519 %}
20520 
20521 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20522   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20523   match(Set dst (MulVB src1 src2));
20524   effect(TEMP xtmp1, TEMP xtmp2);
20525   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20526   ins_encode %{
20527     int vlen_enc = vector_length_encoding(this);
20528     // Odd-index elements
20529     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20530     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20531     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20532     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20533     // Even-index elements
20534     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20536     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20537     // Combine
20538     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20539   %}
20540   ins_pipe( pipe_slow );
20541 %}
20542 
20543 // Shorts/Chars vector mul
20544 instruct vmulS(vec dst, vec src) %{
20545   predicate(UseAVX == 0);
20546   match(Set dst (MulVS dst src));
20547   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20548   ins_encode %{
20549     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20550   %}
20551   ins_pipe( pipe_slow );
20552 %}
20553 
20554 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20555   predicate(UseAVX > 0);
20556   match(Set dst (MulVS src1 src2));
20557   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20558   ins_encode %{
20559     int vlen_enc = vector_length_encoding(this);
20560     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20561   %}
20562   ins_pipe( pipe_slow );
20563 %}
20564 
20565 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20566   predicate((UseAVX > 0) &&
20567             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20568   match(Set dst (MulVS src (LoadVector mem)));
20569   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20570   ins_encode %{
20571     int vlen_enc = vector_length_encoding(this);
20572     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20573   %}
20574   ins_pipe( pipe_slow );
20575 %}
20576 
20577 // Integers vector mul
20578 instruct vmulI(vec dst, vec src) %{
20579   predicate(UseAVX == 0);
20580   match(Set dst (MulVI dst src));
20581   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20582   ins_encode %{
20583     assert(UseSSE > 3, "required");
20584     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20585   %}
20586   ins_pipe( pipe_slow );
20587 %}
20588 
20589 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20590   predicate(UseAVX > 0);
20591   match(Set dst (MulVI src1 src2));
20592   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20593   ins_encode %{
20594     int vlen_enc = vector_length_encoding(this);
20595     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20596   %}
20597   ins_pipe( pipe_slow );
20598 %}
20599 
20600 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20601   predicate((UseAVX > 0) &&
20602             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20603   match(Set dst (MulVI src (LoadVector mem)));
20604   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20605   ins_encode %{
20606     int vlen_enc = vector_length_encoding(this);
20607     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20608   %}
20609   ins_pipe( pipe_slow );
20610 %}
20611 
20612 // Longs vector mul
20613 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20614   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20615              VM_Version::supports_avx512dq()) ||
20616             VM_Version::supports_avx512vldq());
20617   match(Set dst (MulVL src1 src2));
20618   ins_cost(500);
20619   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20620   ins_encode %{
20621     assert(UseAVX > 2, "required");
20622     int vlen_enc = vector_length_encoding(this);
20623     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20624   %}
20625   ins_pipe( pipe_slow );
20626 %}
20627 
20628 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20629   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20630              VM_Version::supports_avx512dq()) ||
20631             (Matcher::vector_length_in_bytes(n) > 8 &&
20632              VM_Version::supports_avx512vldq()));
20633   match(Set dst (MulVL src (LoadVector mem)));
20634   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20635   ins_cost(500);
20636   ins_encode %{
20637     assert(UseAVX > 2, "required");
20638     int vlen_enc = vector_length_encoding(this);
20639     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20640   %}
20641   ins_pipe( pipe_slow );
20642 %}
20643 
20644 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20645   predicate(UseAVX == 0);
20646   match(Set dst (MulVL src1 src2));
20647   ins_cost(500);
20648   effect(TEMP dst, TEMP xtmp);
20649   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20650   ins_encode %{
20651     assert(VM_Version::supports_sse4_1(), "required");
20652     // Get the lo-hi products, only the lower 32 bits is in concerns
20653     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20654     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20655     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20656     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20657     __ psllq($dst$$XMMRegister, 32);
20658     // Get the lo-lo products
20659     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20660     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20661     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20662   %}
20663   ins_pipe( pipe_slow );
20664 %}
20665 
20666 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20667   predicate(UseAVX > 0 &&
20668             ((Matcher::vector_length_in_bytes(n) == 64 &&
20669               !VM_Version::supports_avx512dq()) ||
20670              (Matcher::vector_length_in_bytes(n) < 64 &&
20671               !VM_Version::supports_avx512vldq())));
20672   match(Set dst (MulVL src1 src2));
20673   effect(TEMP xtmp1, TEMP xtmp2);
20674   ins_cost(500);
20675   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20676   ins_encode %{
20677     int vlen_enc = vector_length_encoding(this);
20678     // Get the lo-hi products, only the lower 32 bits is in concerns
20679     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20680     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20681     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20682     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20683     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20684     // Get the lo-lo products
20685     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20686     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20687   %}
20688   ins_pipe( pipe_slow );
20689 %}
20690 
20691 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20692   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20693   match(Set dst (MulVL src1 src2));
20694   ins_cost(100);
20695   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20696   ins_encode %{
20697     int vlen_enc = vector_length_encoding(this);
20698     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20699   %}
20700   ins_pipe( pipe_slow );
20701 %}
20702 
20703 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20704   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20705   match(Set dst (MulVL src1 src2));
20706   ins_cost(100);
20707   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20708   ins_encode %{
20709     int vlen_enc = vector_length_encoding(this);
20710     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20711   %}
20712   ins_pipe( pipe_slow );
20713 %}
20714 
20715 // Floats vector mul
20716 instruct vmulF(vec dst, vec src) %{
20717   predicate(UseAVX == 0);
20718   match(Set dst (MulVF dst src));
20719   format %{ "mulps   $dst,$src\t! mul packedF" %}
20720   ins_encode %{
20721     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20722   %}
20723   ins_pipe( pipe_slow );
20724 %}
20725 
20726 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20727   predicate(UseAVX > 0);
20728   match(Set dst (MulVF src1 src2));
20729   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20730   ins_encode %{
20731     int vlen_enc = vector_length_encoding(this);
20732     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20733   %}
20734   ins_pipe( pipe_slow );
20735 %}
20736 
20737 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20738   predicate((UseAVX > 0) &&
20739             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20740   match(Set dst (MulVF src (LoadVector mem)));
20741   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20742   ins_encode %{
20743     int vlen_enc = vector_length_encoding(this);
20744     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20745   %}
20746   ins_pipe( pipe_slow );
20747 %}
20748 
20749 // Doubles vector mul
20750 instruct vmulD(vec dst, vec src) %{
20751   predicate(UseAVX == 0);
20752   match(Set dst (MulVD dst src));
20753   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20754   ins_encode %{
20755     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20756   %}
20757   ins_pipe( pipe_slow );
20758 %}
20759 
20760 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20761   predicate(UseAVX > 0);
20762   match(Set dst (MulVD src1 src2));
20763   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20764   ins_encode %{
20765     int vlen_enc = vector_length_encoding(this);
20766     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20767   %}
20768   ins_pipe( pipe_slow );
20769 %}
20770 
20771 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20772   predicate((UseAVX > 0) &&
20773             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20774   match(Set dst (MulVD src (LoadVector mem)));
20775   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20776   ins_encode %{
20777     int vlen_enc = vector_length_encoding(this);
20778     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20779   %}
20780   ins_pipe( pipe_slow );
20781 %}
20782 
20783 // --------------------------------- DIV --------------------------------------
20784 
20785 // Floats vector div
20786 instruct vdivF(vec dst, vec src) %{
20787   predicate(UseAVX == 0);
20788   match(Set dst (DivVF dst src));
20789   format %{ "divps   $dst,$src\t! div packedF" %}
20790   ins_encode %{
20791     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20792   %}
20793   ins_pipe( pipe_slow );
20794 %}
20795 
20796 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20797   predicate(UseAVX > 0);
20798   match(Set dst (DivVF src1 src2));
20799   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20800   ins_encode %{
20801     int vlen_enc = vector_length_encoding(this);
20802     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20803   %}
20804   ins_pipe( pipe_slow );
20805 %}
20806 
20807 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20808   predicate((UseAVX > 0) &&
20809             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20810   match(Set dst (DivVF src (LoadVector mem)));
20811   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20812   ins_encode %{
20813     int vlen_enc = vector_length_encoding(this);
20814     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20815   %}
20816   ins_pipe( pipe_slow );
20817 %}
20818 
20819 // Doubles vector div
20820 instruct vdivD(vec dst, vec src) %{
20821   predicate(UseAVX == 0);
20822   match(Set dst (DivVD dst src));
20823   format %{ "divpd   $dst,$src\t! div packedD" %}
20824   ins_encode %{
20825     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20826   %}
20827   ins_pipe( pipe_slow );
20828 %}
20829 
20830 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20831   predicate(UseAVX > 0);
20832   match(Set dst (DivVD src1 src2));
20833   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20834   ins_encode %{
20835     int vlen_enc = vector_length_encoding(this);
20836     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20837   %}
20838   ins_pipe( pipe_slow );
20839 %}
20840 
20841 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20842   predicate((UseAVX > 0) &&
20843             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20844   match(Set dst (DivVD src (LoadVector mem)));
20845   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20846   ins_encode %{
20847     int vlen_enc = vector_length_encoding(this);
20848     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20849   %}
20850   ins_pipe( pipe_slow );
20851 %}
20852 
20853 // ------------------------------ MinMax ---------------------------------------
20854 
20855 // Byte, Short, Int vector Min/Max
20856 instruct minmax_reg_sse(vec dst, vec src) %{
20857   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20858             UseAVX == 0);
20859   match(Set dst (MinV dst src));
20860   match(Set dst (MaxV dst src));
20861   format %{ "vector_minmax  $dst,$src\t!  " %}
20862   ins_encode %{
20863     assert(UseSSE >= 4, "required");
20864 
20865     int opcode = this->ideal_Opcode();
20866     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20867     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20868   %}
20869   ins_pipe( pipe_slow );
20870 %}
20871 
20872 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20873   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20874             UseAVX > 0);
20875   match(Set dst (MinV src1 src2));
20876   match(Set dst (MaxV src1 src2));
20877   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20878   ins_encode %{
20879     int opcode = this->ideal_Opcode();
20880     int vlen_enc = vector_length_encoding(this);
20881     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20882 
20883     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20884   %}
20885   ins_pipe( pipe_slow );
20886 %}
20887 
20888 // Long vector Min/Max
20889 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20890   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20891             UseAVX == 0);
20892   match(Set dst (MinV dst src));
20893   match(Set dst (MaxV src dst));
20894   effect(TEMP dst, TEMP tmp);
20895   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20896   ins_encode %{
20897     assert(UseSSE >= 4, "required");
20898 
20899     int opcode = this->ideal_Opcode();
20900     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20901     assert(elem_bt == T_LONG, "sanity");
20902 
20903     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20904   %}
20905   ins_pipe( pipe_slow );
20906 %}
20907 
20908 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20909   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20910             UseAVX > 0 && !VM_Version::supports_avx512vl());
20911   match(Set dst (MinV src1 src2));
20912   match(Set dst (MaxV src1 src2));
20913   effect(TEMP dst);
20914   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20915   ins_encode %{
20916     int vlen_enc = vector_length_encoding(this);
20917     int opcode = this->ideal_Opcode();
20918     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20919     assert(elem_bt == T_LONG, "sanity");
20920 
20921     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20922   %}
20923   ins_pipe( pipe_slow );
20924 %}
20925 
20926 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20927   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20928             Matcher::vector_element_basic_type(n) == T_LONG);
20929   match(Set dst (MinV src1 src2));
20930   match(Set dst (MaxV src1 src2));
20931   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20932   ins_encode %{
20933     assert(UseAVX > 2, "required");
20934 
20935     int vlen_enc = vector_length_encoding(this);
20936     int opcode = this->ideal_Opcode();
20937     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20938     assert(elem_bt == T_LONG, "sanity");
20939 
20940     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20941   %}
20942   ins_pipe( pipe_slow );
20943 %}
20944 
20945 // Float/Double vector Min/Max
20946 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20947   predicate(VM_Version::supports_avx10_2() &&
20948             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20949   match(Set dst (MinV a b));
20950   match(Set dst (MaxV a b));
20951   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20952   ins_encode %{
20953     int vlen_enc = vector_length_encoding(this);
20954     int opcode = this->ideal_Opcode();
20955     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20956     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20957   %}
20958   ins_pipe( pipe_slow );
20959 %}
20960 
20961 // Float/Double vector Min/Max
20962 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20963   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20964             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20965             UseAVX > 0);
20966   match(Set dst (MinV a b));
20967   match(Set dst (MaxV a b));
20968   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20969   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20970   ins_encode %{
20971     assert(UseAVX > 0, "required");
20972 
20973     int opcode = this->ideal_Opcode();
20974     int vlen_enc = vector_length_encoding(this);
20975     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20976 
20977     __ vminmax_fp(opcode, elem_bt,
20978                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20979                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20980   %}
20981   ins_pipe( pipe_slow );
20982 %}
20983 
20984 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20985   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20986             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20987   match(Set dst (MinV a b));
20988   match(Set dst (MaxV a b));
20989   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20990   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20991   ins_encode %{
20992     assert(UseAVX > 2, "required");
20993 
20994     int opcode = this->ideal_Opcode();
20995     int vlen_enc = vector_length_encoding(this);
20996     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20997 
20998     __ evminmax_fp(opcode, elem_bt,
20999                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21000                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21001   %}
21002   ins_pipe( pipe_slow );
21003 %}
21004 
21005 // ------------------------------ Unsigned vector Min/Max ----------------------
21006 
21007 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21008   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21009   match(Set dst (UMinV a b));
21010   match(Set dst (UMaxV a b));
21011   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21012   ins_encode %{
21013     int opcode = this->ideal_Opcode();
21014     int vlen_enc = vector_length_encoding(this);
21015     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21016     assert(is_integral_type(elem_bt), "");
21017     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21018   %}
21019   ins_pipe( pipe_slow );
21020 %}
21021 
21022 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21023   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21024   match(Set dst (UMinV a (LoadVector b)));
21025   match(Set dst (UMaxV a (LoadVector b)));
21026   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21027   ins_encode %{
21028     int opcode = this->ideal_Opcode();
21029     int vlen_enc = vector_length_encoding(this);
21030     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21031     assert(is_integral_type(elem_bt), "");
21032     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21033   %}
21034   ins_pipe( pipe_slow );
21035 %}
21036 
21037 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21038   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21039   match(Set dst (UMinV a b));
21040   match(Set dst (UMaxV a b));
21041   effect(TEMP xtmp1, TEMP xtmp2);
21042   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21043   ins_encode %{
21044     int opcode = this->ideal_Opcode();
21045     int vlen_enc = vector_length_encoding(this);
21046     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21047   %}
21048   ins_pipe( pipe_slow );
21049 %}
21050 
21051 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21052   match(Set dst (UMinV (Binary dst src2) mask));
21053   match(Set dst (UMaxV (Binary dst src2) mask));
21054   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21055   ins_encode %{
21056     int vlen_enc = vector_length_encoding(this);
21057     BasicType bt = Matcher::vector_element_basic_type(this);
21058     int opc = this->ideal_Opcode();
21059     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21060                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21061   %}
21062   ins_pipe( pipe_slow );
21063 %}
21064 
21065 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21066   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21067   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21068   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21069   ins_encode %{
21070     int vlen_enc = vector_length_encoding(this);
21071     BasicType bt = Matcher::vector_element_basic_type(this);
21072     int opc = this->ideal_Opcode();
21073     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21074                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21075   %}
21076   ins_pipe( pipe_slow );
21077 %}
21078 
21079 // --------------------------------- Signum/CopySign ---------------------------
21080 
21081 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21082   match(Set dst (SignumF dst (Binary zero one)));
21083   effect(KILL cr);
21084   format %{ "signumF $dst, $dst" %}
21085   ins_encode %{
21086     int opcode = this->ideal_Opcode();
21087     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21088   %}
21089   ins_pipe( pipe_slow );
21090 %}
21091 
21092 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21093   match(Set dst (SignumD dst (Binary zero one)));
21094   effect(KILL cr);
21095   format %{ "signumD $dst, $dst" %}
21096   ins_encode %{
21097     int opcode = this->ideal_Opcode();
21098     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21099   %}
21100   ins_pipe( pipe_slow );
21101 %}
21102 
21103 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21104   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21105   match(Set dst (SignumVF src (Binary zero one)));
21106   match(Set dst (SignumVD src (Binary zero one)));
21107   effect(TEMP dst, TEMP xtmp1);
21108   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21109   ins_encode %{
21110     int opcode = this->ideal_Opcode();
21111     int vec_enc = vector_length_encoding(this);
21112     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21113                          $xtmp1$$XMMRegister, vec_enc);
21114   %}
21115   ins_pipe( pipe_slow );
21116 %}
21117 
21118 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21119   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21120   match(Set dst (SignumVF src (Binary zero one)));
21121   match(Set dst (SignumVD src (Binary zero one)));
21122   effect(TEMP dst, TEMP ktmp1);
21123   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21124   ins_encode %{
21125     int opcode = this->ideal_Opcode();
21126     int vec_enc = vector_length_encoding(this);
21127     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21128                           $ktmp1$$KRegister, vec_enc);
21129   %}
21130   ins_pipe( pipe_slow );
21131 %}
21132 
21133 // ---------------------------------------
21134 // For copySign use 0xE4 as writemask for vpternlog
21135 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21136 // C (xmm2) is set to 0x7FFFFFFF
21137 // Wherever xmm2 is 0, we want to pick from B (sign)
21138 // Wherever xmm2 is 1, we want to pick from A (src)
21139 //
21140 // A B C Result
21141 // 0 0 0 0
21142 // 0 0 1 0
21143 // 0 1 0 1
21144 // 0 1 1 0
21145 // 1 0 0 0
21146 // 1 0 1 1
21147 // 1 1 0 1
21148 // 1 1 1 1
21149 //
21150 // Result going from high bit to low bit is 0x11100100 = 0xe4
21151 // ---------------------------------------
21152 
21153 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21154   match(Set dst (CopySignF dst src));
21155   effect(TEMP tmp1, TEMP tmp2);
21156   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21157   ins_encode %{
21158     __ movl($tmp2$$Register, 0x7FFFFFFF);
21159     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21160     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21161   %}
21162   ins_pipe( pipe_slow );
21163 %}
21164 
21165 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21166   match(Set dst (CopySignD dst (Binary src zero)));
21167   ins_cost(100);
21168   effect(TEMP tmp1, TEMP tmp2);
21169   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21170   ins_encode %{
21171     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21172     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21173     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21174   %}
21175   ins_pipe( pipe_slow );
21176 %}
21177 
21178 //----------------------------- CompressBits/ExpandBits ------------------------
21179 
21180 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21181   predicate(n->bottom_type()->isa_int());
21182   match(Set dst (CompressBits src mask));
21183   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21184   ins_encode %{
21185     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21186   %}
21187   ins_pipe( pipe_slow );
21188 %}
21189 
21190 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21191   predicate(n->bottom_type()->isa_int());
21192   match(Set dst (ExpandBits src mask));
21193   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21194   ins_encode %{
21195     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21196   %}
21197   ins_pipe( pipe_slow );
21198 %}
21199 
21200 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21201   predicate(n->bottom_type()->isa_int());
21202   match(Set dst (CompressBits src (LoadI mask)));
21203   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21204   ins_encode %{
21205     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21206   %}
21207   ins_pipe( pipe_slow );
21208 %}
21209 
21210 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21211   predicate(n->bottom_type()->isa_int());
21212   match(Set dst (ExpandBits src (LoadI mask)));
21213   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21214   ins_encode %{
21215     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21216   %}
21217   ins_pipe( pipe_slow );
21218 %}
21219 
21220 // --------------------------------- Sqrt --------------------------------------
21221 
21222 instruct vsqrtF_reg(vec dst, vec src) %{
21223   match(Set dst (SqrtVF src));
21224   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21225   ins_encode %{
21226     assert(UseAVX > 0, "required");
21227     int vlen_enc = vector_length_encoding(this);
21228     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21229   %}
21230   ins_pipe( pipe_slow );
21231 %}
21232 
21233 instruct vsqrtF_mem(vec dst, memory mem) %{
21234   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21235   match(Set dst (SqrtVF (LoadVector mem)));
21236   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21237   ins_encode %{
21238     assert(UseAVX > 0, "required");
21239     int vlen_enc = vector_length_encoding(this);
21240     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21241   %}
21242   ins_pipe( pipe_slow );
21243 %}
21244 
21245 // Floating point vector sqrt
21246 instruct vsqrtD_reg(vec dst, vec src) %{
21247   match(Set dst (SqrtVD src));
21248   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21249   ins_encode %{
21250     assert(UseAVX > 0, "required");
21251     int vlen_enc = vector_length_encoding(this);
21252     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21253   %}
21254   ins_pipe( pipe_slow );
21255 %}
21256 
21257 instruct vsqrtD_mem(vec dst, memory mem) %{
21258   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21259   match(Set dst (SqrtVD (LoadVector mem)));
21260   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21261   ins_encode %{
21262     assert(UseAVX > 0, "required");
21263     int vlen_enc = vector_length_encoding(this);
21264     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21265   %}
21266   ins_pipe( pipe_slow );
21267 %}
21268 
21269 // ------------------------------ Shift ---------------------------------------
21270 
21271 // Left and right shift count vectors are the same on x86
21272 // (only lowest bits of xmm reg are used for count).
21273 instruct vshiftcnt(vec dst, rRegI cnt) %{
21274   match(Set dst (LShiftCntV cnt));
21275   match(Set dst (RShiftCntV cnt));
21276   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21277   ins_encode %{
21278     __ movdl($dst$$XMMRegister, $cnt$$Register);
21279   %}
21280   ins_pipe( pipe_slow );
21281 %}
21282 
21283 // Byte vector shift
21284 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21285   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21286   match(Set dst ( LShiftVB src shift));
21287   match(Set dst ( RShiftVB src shift));
21288   match(Set dst (URShiftVB src shift));
21289   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21290   format %{"vector_byte_shift $dst,$src,$shift" %}
21291   ins_encode %{
21292     assert(UseSSE > 3, "required");
21293     int opcode = this->ideal_Opcode();
21294     bool sign = (opcode != Op_URShiftVB);
21295     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21296     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21297     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21298     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21299     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21300   %}
21301   ins_pipe( pipe_slow );
21302 %}
21303 
21304 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21305   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21306             UseAVX <= 1);
21307   match(Set dst ( LShiftVB src shift));
21308   match(Set dst ( RShiftVB src shift));
21309   match(Set dst (URShiftVB src shift));
21310   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21311   format %{"vector_byte_shift $dst,$src,$shift" %}
21312   ins_encode %{
21313     assert(UseSSE > 3, "required");
21314     int opcode = this->ideal_Opcode();
21315     bool sign = (opcode != Op_URShiftVB);
21316     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21317     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21318     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21319     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21320     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21321     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21322     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21323     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21324     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21325   %}
21326   ins_pipe( pipe_slow );
21327 %}
21328 
21329 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21330   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21331             UseAVX > 1);
21332   match(Set dst ( LShiftVB src shift));
21333   match(Set dst ( RShiftVB src shift));
21334   match(Set dst (URShiftVB src shift));
21335   effect(TEMP dst, TEMP tmp);
21336   format %{"vector_byte_shift $dst,$src,$shift" %}
21337   ins_encode %{
21338     int opcode = this->ideal_Opcode();
21339     bool sign = (opcode != Op_URShiftVB);
21340     int vlen_enc = Assembler::AVX_256bit;
21341     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21342     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21343     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21344     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21345     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21346   %}
21347   ins_pipe( pipe_slow );
21348 %}
21349 
21350 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21351   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21352   match(Set dst ( LShiftVB src shift));
21353   match(Set dst ( RShiftVB src shift));
21354   match(Set dst (URShiftVB src shift));
21355   effect(TEMP dst, TEMP tmp);
21356   format %{"vector_byte_shift $dst,$src,$shift" %}
21357   ins_encode %{
21358     assert(UseAVX > 1, "required");
21359     int opcode = this->ideal_Opcode();
21360     bool sign = (opcode != Op_URShiftVB);
21361     int vlen_enc = Assembler::AVX_256bit;
21362     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21363     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21364     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21365     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21366     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21367     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21368     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21369     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21370     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21371   %}
21372   ins_pipe( pipe_slow );
21373 %}
21374 
21375 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21376   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21377   match(Set dst ( LShiftVB src shift));
21378   match(Set dst  (RShiftVB src shift));
21379   match(Set dst (URShiftVB src shift));
21380   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21381   format %{"vector_byte_shift $dst,$src,$shift" %}
21382   ins_encode %{
21383     assert(UseAVX > 2, "required");
21384     int opcode = this->ideal_Opcode();
21385     bool sign = (opcode != Op_URShiftVB);
21386     int vlen_enc = Assembler::AVX_512bit;
21387     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21388     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21389     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21390     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21391     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21392     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21393     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21394     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21395     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21396     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21397     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21398     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21399   %}
21400   ins_pipe( pipe_slow );
21401 %}
21402 
21403 // Shorts vector logical right shift produces incorrect Java result
21404 // for negative data because java code convert short value into int with
21405 // sign extension before a shift. But char vectors are fine since chars are
21406 // unsigned values.
21407 // Shorts/Chars vector left shift
21408 instruct vshiftS(vec dst, vec src, vec shift) %{
21409   predicate(!n->as_ShiftV()->is_var_shift());
21410   match(Set dst ( LShiftVS src shift));
21411   match(Set dst ( RShiftVS src shift));
21412   match(Set dst (URShiftVS src shift));
21413   effect(TEMP dst, USE src, USE shift);
21414   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21415   ins_encode %{
21416     int opcode = this->ideal_Opcode();
21417     if (UseAVX > 0) {
21418       int vlen_enc = vector_length_encoding(this);
21419       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21420     } else {
21421       int vlen = Matcher::vector_length(this);
21422       if (vlen == 2) {
21423         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21424         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21425       } else if (vlen == 4) {
21426         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21427         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21428       } else {
21429         assert (vlen == 8, "sanity");
21430         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21431         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21432       }
21433     }
21434   %}
21435   ins_pipe( pipe_slow );
21436 %}
21437 
21438 // Integers vector left shift
21439 instruct vshiftI(vec dst, vec src, vec shift) %{
21440   predicate(!n->as_ShiftV()->is_var_shift());
21441   match(Set dst ( LShiftVI src shift));
21442   match(Set dst ( RShiftVI src shift));
21443   match(Set dst (URShiftVI src shift));
21444   effect(TEMP dst, USE src, USE shift);
21445   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21446   ins_encode %{
21447     int opcode = this->ideal_Opcode();
21448     if (UseAVX > 0) {
21449       int vlen_enc = vector_length_encoding(this);
21450       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21451     } else {
21452       int vlen = Matcher::vector_length(this);
21453       if (vlen == 2) {
21454         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21455         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21456       } else {
21457         assert(vlen == 4, "sanity");
21458         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21459         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21460       }
21461     }
21462   %}
21463   ins_pipe( pipe_slow );
21464 %}
21465 
21466 // Integers vector left constant shift
21467 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21468   match(Set dst (LShiftVI src (LShiftCntV shift)));
21469   match(Set dst (RShiftVI src (RShiftCntV shift)));
21470   match(Set dst (URShiftVI src (RShiftCntV shift)));
21471   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21472   ins_encode %{
21473     int opcode = this->ideal_Opcode();
21474     if (UseAVX > 0) {
21475       int vector_len = vector_length_encoding(this);
21476       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21477     } else {
21478       int vlen = Matcher::vector_length(this);
21479       if (vlen == 2) {
21480         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21481         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21482       } else {
21483         assert(vlen == 4, "sanity");
21484         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21485         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21486       }
21487     }
21488   %}
21489   ins_pipe( pipe_slow );
21490 %}
21491 
21492 // Longs vector shift
21493 instruct vshiftL(vec dst, vec src, vec shift) %{
21494   predicate(!n->as_ShiftV()->is_var_shift());
21495   match(Set dst ( LShiftVL src shift));
21496   match(Set dst (URShiftVL src shift));
21497   effect(TEMP dst, USE src, USE shift);
21498   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21499   ins_encode %{
21500     int opcode = this->ideal_Opcode();
21501     if (UseAVX > 0) {
21502       int vlen_enc = vector_length_encoding(this);
21503       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21504     } else {
21505       assert(Matcher::vector_length(this) == 2, "");
21506       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21507       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21508     }
21509   %}
21510   ins_pipe( pipe_slow );
21511 %}
21512 
21513 // Longs vector constant shift
21514 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21515   match(Set dst (LShiftVL src (LShiftCntV shift)));
21516   match(Set dst (URShiftVL src (RShiftCntV shift)));
21517   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21518   ins_encode %{
21519     int opcode = this->ideal_Opcode();
21520     if (UseAVX > 0) {
21521       int vector_len = vector_length_encoding(this);
21522       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21523     } else {
21524       assert(Matcher::vector_length(this) == 2, "");
21525       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21526       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21527     }
21528   %}
21529   ins_pipe( pipe_slow );
21530 %}
21531 
21532 // -------------------ArithmeticRightShift -----------------------------------
21533 // Long vector arithmetic right shift
21534 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21535   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21536   match(Set dst (RShiftVL src shift));
21537   effect(TEMP dst, TEMP tmp);
21538   format %{ "vshiftq $dst,$src,$shift" %}
21539   ins_encode %{
21540     uint vlen = Matcher::vector_length(this);
21541     if (vlen == 2) {
21542       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21543       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21544       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21545       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21546       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21547       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21548     } else {
21549       assert(vlen == 4, "sanity");
21550       assert(UseAVX > 1, "required");
21551       int vlen_enc = Assembler::AVX_256bit;
21552       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21553       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21554       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21555       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21556       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21557     }
21558   %}
21559   ins_pipe( pipe_slow );
21560 %}
21561 
21562 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21563   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21564   match(Set dst (RShiftVL src shift));
21565   format %{ "vshiftq $dst,$src,$shift" %}
21566   ins_encode %{
21567     int vlen_enc = vector_length_encoding(this);
21568     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21569   %}
21570   ins_pipe( pipe_slow );
21571 %}
21572 
21573 // ------------------- Variable Shift -----------------------------
21574 // Byte variable shift
21575 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21576   predicate(Matcher::vector_length(n) <= 8 &&
21577             n->as_ShiftV()->is_var_shift() &&
21578             !VM_Version::supports_avx512bw());
21579   match(Set dst ( LShiftVB src shift));
21580   match(Set dst ( RShiftVB src shift));
21581   match(Set dst (URShiftVB src shift));
21582   effect(TEMP dst, TEMP vtmp);
21583   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21584   ins_encode %{
21585     assert(UseAVX >= 2, "required");
21586 
21587     int opcode = this->ideal_Opcode();
21588     int vlen_enc = Assembler::AVX_128bit;
21589     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21590     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21591   %}
21592   ins_pipe( pipe_slow );
21593 %}
21594 
21595 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21596   predicate(Matcher::vector_length(n) == 16 &&
21597             n->as_ShiftV()->is_var_shift() &&
21598             !VM_Version::supports_avx512bw());
21599   match(Set dst ( LShiftVB src shift));
21600   match(Set dst ( RShiftVB src shift));
21601   match(Set dst (URShiftVB src shift));
21602   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21603   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21604   ins_encode %{
21605     assert(UseAVX >= 2, "required");
21606 
21607     int opcode = this->ideal_Opcode();
21608     int vlen_enc = Assembler::AVX_128bit;
21609     // Shift lower half and get word result in dst
21610     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21611 
21612     // Shift upper half and get word result in vtmp1
21613     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21614     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21615     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21616 
21617     // Merge and down convert the two word results to byte in dst
21618     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21619   %}
21620   ins_pipe( pipe_slow );
21621 %}
21622 
21623 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21624   predicate(Matcher::vector_length(n) == 32 &&
21625             n->as_ShiftV()->is_var_shift() &&
21626             !VM_Version::supports_avx512bw());
21627   match(Set dst ( LShiftVB src shift));
21628   match(Set dst ( RShiftVB src shift));
21629   match(Set dst (URShiftVB src shift));
21630   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21631   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21632   ins_encode %{
21633     assert(UseAVX >= 2, "required");
21634 
21635     int opcode = this->ideal_Opcode();
21636     int vlen_enc = Assembler::AVX_128bit;
21637     // Process lower 128 bits and get result in dst
21638     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21639     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21640     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21641     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21642     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21643 
21644     // Process higher 128 bits and get result in vtmp3
21645     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21646     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21647     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21648     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21649     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21650     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21651     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21652 
21653     // Merge the two results in dst
21654     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21655   %}
21656   ins_pipe( pipe_slow );
21657 %}
21658 
21659 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21660   predicate(Matcher::vector_length(n) <= 32 &&
21661             n->as_ShiftV()->is_var_shift() &&
21662             VM_Version::supports_avx512bw());
21663   match(Set dst ( LShiftVB src shift));
21664   match(Set dst ( RShiftVB src shift));
21665   match(Set dst (URShiftVB src shift));
21666   effect(TEMP dst, TEMP vtmp);
21667   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21668   ins_encode %{
21669     assert(UseAVX > 2, "required");
21670 
21671     int opcode = this->ideal_Opcode();
21672     int vlen_enc = vector_length_encoding(this);
21673     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21674   %}
21675   ins_pipe( pipe_slow );
21676 %}
21677 
21678 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21679   predicate(Matcher::vector_length(n) == 64 &&
21680             n->as_ShiftV()->is_var_shift() &&
21681             VM_Version::supports_avx512bw());
21682   match(Set dst ( LShiftVB src shift));
21683   match(Set dst ( RShiftVB src shift));
21684   match(Set dst (URShiftVB src shift));
21685   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21686   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21687   ins_encode %{
21688     assert(UseAVX > 2, "required");
21689 
21690     int opcode = this->ideal_Opcode();
21691     int vlen_enc = Assembler::AVX_256bit;
21692     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21693     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21694     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21695     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21696     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21697   %}
21698   ins_pipe( pipe_slow );
21699 %}
21700 
21701 // Short variable shift
21702 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21703   predicate(Matcher::vector_length(n) <= 8 &&
21704             n->as_ShiftV()->is_var_shift() &&
21705             !VM_Version::supports_avx512bw());
21706   match(Set dst ( LShiftVS src shift));
21707   match(Set dst ( RShiftVS src shift));
21708   match(Set dst (URShiftVS src shift));
21709   effect(TEMP dst, TEMP vtmp);
21710   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21711   ins_encode %{
21712     assert(UseAVX >= 2, "required");
21713 
21714     int opcode = this->ideal_Opcode();
21715     bool sign = (opcode != Op_URShiftVS);
21716     int vlen_enc = Assembler::AVX_256bit;
21717     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21718     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21719     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21720     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21721     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21722     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21723   %}
21724   ins_pipe( pipe_slow );
21725 %}
21726 
21727 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21728   predicate(Matcher::vector_length(n) == 16 &&
21729             n->as_ShiftV()->is_var_shift() &&
21730             !VM_Version::supports_avx512bw());
21731   match(Set dst ( LShiftVS src shift));
21732   match(Set dst ( RShiftVS src shift));
21733   match(Set dst (URShiftVS src shift));
21734   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21735   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21736   ins_encode %{
21737     assert(UseAVX >= 2, "required");
21738 
21739     int opcode = this->ideal_Opcode();
21740     bool sign = (opcode != Op_URShiftVS);
21741     int vlen_enc = Assembler::AVX_256bit;
21742     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21743     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21744     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21745     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21746     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21747 
21748     // Shift upper half, with result in dst using vtmp1 as TEMP
21749     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21750     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21751     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21752     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21753     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21754     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21755 
21756     // Merge lower and upper half result into dst
21757     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21758     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21759   %}
21760   ins_pipe( pipe_slow );
21761 %}
21762 
21763 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21764   predicate(n->as_ShiftV()->is_var_shift() &&
21765             VM_Version::supports_avx512bw());
21766   match(Set dst ( LShiftVS src shift));
21767   match(Set dst ( RShiftVS src shift));
21768   match(Set dst (URShiftVS src shift));
21769   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21770   ins_encode %{
21771     assert(UseAVX > 2, "required");
21772 
21773     int opcode = this->ideal_Opcode();
21774     int vlen_enc = vector_length_encoding(this);
21775     if (!VM_Version::supports_avx512vl()) {
21776       vlen_enc = Assembler::AVX_512bit;
21777     }
21778     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21779   %}
21780   ins_pipe( pipe_slow );
21781 %}
21782 
21783 //Integer variable shift
21784 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21785   predicate(n->as_ShiftV()->is_var_shift());
21786   match(Set dst ( LShiftVI src shift));
21787   match(Set dst ( RShiftVI src shift));
21788   match(Set dst (URShiftVI src shift));
21789   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21790   ins_encode %{
21791     assert(UseAVX >= 2, "required");
21792 
21793     int opcode = this->ideal_Opcode();
21794     int vlen_enc = vector_length_encoding(this);
21795     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21796   %}
21797   ins_pipe( pipe_slow );
21798 %}
21799 
21800 //Long variable shift
21801 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21802   predicate(n->as_ShiftV()->is_var_shift());
21803   match(Set dst ( LShiftVL src shift));
21804   match(Set dst (URShiftVL src shift));
21805   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21806   ins_encode %{
21807     assert(UseAVX >= 2, "required");
21808 
21809     int opcode = this->ideal_Opcode();
21810     int vlen_enc = vector_length_encoding(this);
21811     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21812   %}
21813   ins_pipe( pipe_slow );
21814 %}
21815 
21816 //Long variable right shift arithmetic
21817 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21818   predicate(Matcher::vector_length(n) <= 4 &&
21819             n->as_ShiftV()->is_var_shift() &&
21820             UseAVX == 2);
21821   match(Set dst (RShiftVL src shift));
21822   effect(TEMP dst, TEMP vtmp);
21823   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21824   ins_encode %{
21825     int opcode = this->ideal_Opcode();
21826     int vlen_enc = vector_length_encoding(this);
21827     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21828                  $vtmp$$XMMRegister);
21829   %}
21830   ins_pipe( pipe_slow );
21831 %}
21832 
21833 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21834   predicate(n->as_ShiftV()->is_var_shift() &&
21835             UseAVX > 2);
21836   match(Set dst (RShiftVL src shift));
21837   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21838   ins_encode %{
21839     int opcode = this->ideal_Opcode();
21840     int vlen_enc = vector_length_encoding(this);
21841     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21842   %}
21843   ins_pipe( pipe_slow );
21844 %}
21845 
21846 // --------------------------------- AND --------------------------------------
21847 
21848 instruct vand(vec dst, vec src) %{
21849   predicate(UseAVX == 0);
21850   match(Set dst (AndV dst src));
21851   format %{ "pand    $dst,$src\t! and vectors" %}
21852   ins_encode %{
21853     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21854   %}
21855   ins_pipe( pipe_slow );
21856 %}
21857 
21858 instruct vand_reg(vec dst, vec src1, vec src2) %{
21859   predicate(UseAVX > 0);
21860   match(Set dst (AndV src1 src2));
21861   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21862   ins_encode %{
21863     int vlen_enc = vector_length_encoding(this);
21864     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21865   %}
21866   ins_pipe( pipe_slow );
21867 %}
21868 
21869 instruct vand_mem(vec dst, vec src, memory mem) %{
21870   predicate((UseAVX > 0) &&
21871             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21872   match(Set dst (AndV src (LoadVector mem)));
21873   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21874   ins_encode %{
21875     int vlen_enc = vector_length_encoding(this);
21876     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21877   %}
21878   ins_pipe( pipe_slow );
21879 %}
21880 
21881 // --------------------------------- OR ---------------------------------------
21882 
21883 instruct vor(vec dst, vec src) %{
21884   predicate(UseAVX == 0);
21885   match(Set dst (OrV dst src));
21886   format %{ "por     $dst,$src\t! or vectors" %}
21887   ins_encode %{
21888     __ por($dst$$XMMRegister, $src$$XMMRegister);
21889   %}
21890   ins_pipe( pipe_slow );
21891 %}
21892 
21893 instruct vor_reg(vec dst, vec src1, vec src2) %{
21894   predicate(UseAVX > 0);
21895   match(Set dst (OrV src1 src2));
21896   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21897   ins_encode %{
21898     int vlen_enc = vector_length_encoding(this);
21899     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21900   %}
21901   ins_pipe( pipe_slow );
21902 %}
21903 
21904 instruct vor_mem(vec dst, vec src, memory mem) %{
21905   predicate((UseAVX > 0) &&
21906             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21907   match(Set dst (OrV src (LoadVector mem)));
21908   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21909   ins_encode %{
21910     int vlen_enc = vector_length_encoding(this);
21911     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21912   %}
21913   ins_pipe( pipe_slow );
21914 %}
21915 
21916 // --------------------------------- XOR --------------------------------------
21917 
21918 instruct vxor(vec dst, vec src) %{
21919   predicate(UseAVX == 0);
21920   match(Set dst (XorV dst src));
21921   format %{ "pxor    $dst,$src\t! xor vectors" %}
21922   ins_encode %{
21923     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21924   %}
21925   ins_pipe( pipe_slow );
21926 %}
21927 
21928 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21929   predicate(UseAVX > 0);
21930   match(Set dst (XorV src1 src2));
21931   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21932   ins_encode %{
21933     int vlen_enc = vector_length_encoding(this);
21934     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21935   %}
21936   ins_pipe( pipe_slow );
21937 %}
21938 
21939 instruct vxor_mem(vec dst, vec src, memory mem) %{
21940   predicate((UseAVX > 0) &&
21941             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21942   match(Set dst (XorV src (LoadVector mem)));
21943   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21944   ins_encode %{
21945     int vlen_enc = vector_length_encoding(this);
21946     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21947   %}
21948   ins_pipe( pipe_slow );
21949 %}
21950 
21951 // --------------------------------- VectorCast --------------------------------------
21952 
21953 instruct vcastBtoX(vec dst, vec src) %{
21954   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21955   match(Set dst (VectorCastB2X src));
21956   format %{ "vector_cast_b2x $dst,$src\t!" %}
21957   ins_encode %{
21958     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21959     int vlen_enc = vector_length_encoding(this);
21960     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21961   %}
21962   ins_pipe( pipe_slow );
21963 %}
21964 
21965 instruct vcastBtoD(legVec dst, legVec src) %{
21966   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21967   match(Set dst (VectorCastB2X src));
21968   format %{ "vector_cast_b2x $dst,$src\t!" %}
21969   ins_encode %{
21970     int vlen_enc = vector_length_encoding(this);
21971     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21972   %}
21973   ins_pipe( pipe_slow );
21974 %}
21975 
21976 instruct castStoX(vec dst, vec src) %{
21977   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21978             Matcher::vector_length(n->in(1)) <= 8 && // src
21979             Matcher::vector_element_basic_type(n) == T_BYTE);
21980   match(Set dst (VectorCastS2X src));
21981   format %{ "vector_cast_s2x $dst,$src" %}
21982   ins_encode %{
21983     assert(UseAVX > 0, "required");
21984 
21985     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21986     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21987   %}
21988   ins_pipe( pipe_slow );
21989 %}
21990 
21991 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21992   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21993             Matcher::vector_length(n->in(1)) == 16 && // src
21994             Matcher::vector_element_basic_type(n) == T_BYTE);
21995   effect(TEMP dst, TEMP vtmp);
21996   match(Set dst (VectorCastS2X src));
21997   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21998   ins_encode %{
21999     assert(UseAVX > 0, "required");
22000 
22001     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22002     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22003     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22004     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22005   %}
22006   ins_pipe( pipe_slow );
22007 %}
22008 
22009 instruct vcastStoX_evex(vec dst, vec src) %{
22010   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22011             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22012   match(Set dst (VectorCastS2X src));
22013   format %{ "vector_cast_s2x $dst,$src\t!" %}
22014   ins_encode %{
22015     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22016     int src_vlen_enc = vector_length_encoding(this, $src);
22017     int vlen_enc = vector_length_encoding(this);
22018     switch (to_elem_bt) {
22019       case T_BYTE:
22020         if (!VM_Version::supports_avx512vl()) {
22021           vlen_enc = Assembler::AVX_512bit;
22022         }
22023         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22024         break;
22025       case T_INT:
22026         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22027         break;
22028       case T_FLOAT:
22029         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22030         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22031         break;
22032       case T_LONG:
22033         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22034         break;
22035       case T_DOUBLE: {
22036         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22037         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22038         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22039         break;
22040       }
22041       default:
22042         ShouldNotReachHere();
22043     }
22044   %}
22045   ins_pipe( pipe_slow );
22046 %}
22047 
22048 instruct castItoX(vec dst, vec src) %{
22049   predicate(UseAVX <= 2 &&
22050             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22051             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22052   match(Set dst (VectorCastI2X src));
22053   format %{ "vector_cast_i2x $dst,$src" %}
22054   ins_encode %{
22055     assert(UseAVX > 0, "required");
22056 
22057     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22058     int vlen_enc = vector_length_encoding(this, $src);
22059 
22060     if (to_elem_bt == T_BYTE) {
22061       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22062       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22063       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22064     } else {
22065       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22066       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22067       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22068     }
22069   %}
22070   ins_pipe( pipe_slow );
22071 %}
22072 
22073 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22074   predicate(UseAVX <= 2 &&
22075             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22076             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22077   match(Set dst (VectorCastI2X src));
22078   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22079   effect(TEMP dst, TEMP vtmp);
22080   ins_encode %{
22081     assert(UseAVX > 0, "required");
22082 
22083     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22084     int vlen_enc = vector_length_encoding(this, $src);
22085 
22086     if (to_elem_bt == T_BYTE) {
22087       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22088       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22089       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22090       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22091     } else {
22092       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22093       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22094       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22095       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22096     }
22097   %}
22098   ins_pipe( pipe_slow );
22099 %}
22100 
22101 instruct vcastItoX_evex(vec dst, vec src) %{
22102   predicate(UseAVX > 2 ||
22103             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22104   match(Set dst (VectorCastI2X src));
22105   format %{ "vector_cast_i2x $dst,$src\t!" %}
22106   ins_encode %{
22107     assert(UseAVX > 0, "required");
22108 
22109     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22110     int src_vlen_enc = vector_length_encoding(this, $src);
22111     int dst_vlen_enc = vector_length_encoding(this);
22112     switch (dst_elem_bt) {
22113       case T_BYTE:
22114         if (!VM_Version::supports_avx512vl()) {
22115           src_vlen_enc = Assembler::AVX_512bit;
22116         }
22117         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22118         break;
22119       case T_SHORT:
22120         if (!VM_Version::supports_avx512vl()) {
22121           src_vlen_enc = Assembler::AVX_512bit;
22122         }
22123         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22124         break;
22125       case T_FLOAT:
22126         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22127         break;
22128       case T_LONG:
22129         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22130         break;
22131       case T_DOUBLE:
22132         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22133         break;
22134       default:
22135         ShouldNotReachHere();
22136     }
22137   %}
22138   ins_pipe( pipe_slow );
22139 %}
22140 
22141 instruct vcastLtoBS(vec dst, vec src) %{
22142   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22143             UseAVX <= 2);
22144   match(Set dst (VectorCastL2X src));
22145   format %{ "vector_cast_l2x  $dst,$src" %}
22146   ins_encode %{
22147     assert(UseAVX > 0, "required");
22148 
22149     int vlen = Matcher::vector_length_in_bytes(this, $src);
22150     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22151     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22152                                                       : ExternalAddress(vector_int_to_short_mask());
22153     if (vlen <= 16) {
22154       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22155       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22156       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22157     } else {
22158       assert(vlen <= 32, "required");
22159       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22160       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22161       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22162       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22163     }
22164     if (to_elem_bt == T_BYTE) {
22165       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22166     }
22167   %}
22168   ins_pipe( pipe_slow );
22169 %}
22170 
22171 instruct vcastLtoX_evex(vec dst, vec src) %{
22172   predicate(UseAVX > 2 ||
22173             (Matcher::vector_element_basic_type(n) == T_INT ||
22174              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22175              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22176   match(Set dst (VectorCastL2X src));
22177   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22178   ins_encode %{
22179     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22180     int vlen = Matcher::vector_length_in_bytes(this, $src);
22181     int vlen_enc = vector_length_encoding(this, $src);
22182     switch (to_elem_bt) {
22183       case T_BYTE:
22184         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22185           vlen_enc = Assembler::AVX_512bit;
22186         }
22187         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22188         break;
22189       case T_SHORT:
22190         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22191           vlen_enc = Assembler::AVX_512bit;
22192         }
22193         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22194         break;
22195       case T_INT:
22196         if (vlen == 8) {
22197           if ($dst$$XMMRegister != $src$$XMMRegister) {
22198             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22199           }
22200         } else if (vlen == 16) {
22201           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22202         } else if (vlen == 32) {
22203           if (UseAVX > 2) {
22204             if (!VM_Version::supports_avx512vl()) {
22205               vlen_enc = Assembler::AVX_512bit;
22206             }
22207             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22208           } else {
22209             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22210             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22211           }
22212         } else { // vlen == 64
22213           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22214         }
22215         break;
22216       case T_FLOAT:
22217         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22218         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22219         break;
22220       case T_DOUBLE:
22221         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22222         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22223         break;
22224 
22225       default: assert(false, "%s", type2name(to_elem_bt));
22226     }
22227   %}
22228   ins_pipe( pipe_slow );
22229 %}
22230 
22231 instruct vcastFtoD_reg(vec dst, vec src) %{
22232   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22233   match(Set dst (VectorCastF2X src));
22234   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22235   ins_encode %{
22236     int vlen_enc = vector_length_encoding(this);
22237     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22238   %}
22239   ins_pipe( pipe_slow );
22240 %}
22241 
22242 
22243 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22244   predicate(!VM_Version::supports_avx10_2() &&
22245             !VM_Version::supports_avx512vl() &&
22246             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22247             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22248             is_integral_type(Matcher::vector_element_basic_type(n)));
22249   match(Set dst (VectorCastF2X src));
22250   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22251   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22252   ins_encode %{
22253     int vlen_enc = vector_length_encoding(this, $src);
22254     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22255     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22256     // 32 bit addresses for register indirect addressing mode since stub constants
22257     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22258     // However, targets are free to increase this limit, but having a large code cache size
22259     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22260     // cap we save a temporary register allocation which in limiting case can prevent
22261     // spilling in high register pressure blocks.
22262     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22263                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22264                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22265   %}
22266   ins_pipe( pipe_slow );
22267 %}
22268 
22269 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22270   predicate(!VM_Version::supports_avx10_2() &&
22271             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22272             is_integral_type(Matcher::vector_element_basic_type(n)));
22273   match(Set dst (VectorCastF2X src));
22274   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22275   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22276   ins_encode %{
22277     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22278     if (to_elem_bt == T_LONG) {
22279       int vlen_enc = vector_length_encoding(this);
22280       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22281                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22282                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22283     } else {
22284       int vlen_enc = vector_length_encoding(this, $src);
22285       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22286                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22287                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22288     }
22289   %}
22290   ins_pipe( pipe_slow );
22291 %}
22292 
22293 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22294   predicate(VM_Version::supports_avx10_2() &&
22295             is_integral_type(Matcher::vector_element_basic_type(n)));
22296   match(Set dst (VectorCastF2X src));
22297   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22298   ins_encode %{
22299     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22300     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22301     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22302   %}
22303   ins_pipe( pipe_slow );
22304 %}
22305 
22306 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22307   predicate(VM_Version::supports_avx10_2() &&
22308             is_integral_type(Matcher::vector_element_basic_type(n)));
22309   match(Set dst (VectorCastF2X (LoadVector src)));
22310   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22311   ins_encode %{
22312     int vlen = Matcher::vector_length(this);
22313     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22314     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22315     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22316   %}
22317   ins_pipe( pipe_slow );
22318 %}
22319 
22320 instruct vcastDtoF_reg(vec dst, vec src) %{
22321   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22322   match(Set dst (VectorCastD2X src));
22323   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22324   ins_encode %{
22325     int vlen_enc = vector_length_encoding(this, $src);
22326     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22327   %}
22328   ins_pipe( pipe_slow );
22329 %}
22330 
22331 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22332   predicate(!VM_Version::supports_avx10_2() &&
22333             !VM_Version::supports_avx512vl() &&
22334             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22335             is_integral_type(Matcher::vector_element_basic_type(n)));
22336   match(Set dst (VectorCastD2X src));
22337   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22338   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22339   ins_encode %{
22340     int vlen_enc = vector_length_encoding(this, $src);
22341     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22342     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22343                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22344                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22345   %}
22346   ins_pipe( pipe_slow );
22347 %}
22348 
22349 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22350   predicate(!VM_Version::supports_avx10_2() &&
22351             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22352             is_integral_type(Matcher::vector_element_basic_type(n)));
22353   match(Set dst (VectorCastD2X src));
22354   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22355   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22356   ins_encode %{
22357     int vlen_enc = vector_length_encoding(this, $src);
22358     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22359     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22360                               ExternalAddress(vector_float_signflip());
22361     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22362                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22363   %}
22364   ins_pipe( pipe_slow );
22365 %}
22366 
22367 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22368   predicate(VM_Version::supports_avx10_2() &&
22369             is_integral_type(Matcher::vector_element_basic_type(n)));
22370   match(Set dst (VectorCastD2X src));
22371   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22372   ins_encode %{
22373     int vlen_enc = vector_length_encoding(this, $src);
22374     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22375     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22376   %}
22377   ins_pipe( pipe_slow );
22378 %}
22379 
22380 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22381   predicate(VM_Version::supports_avx10_2() &&
22382             is_integral_type(Matcher::vector_element_basic_type(n)));
22383   match(Set dst (VectorCastD2X (LoadVector src)));
22384   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22385   ins_encode %{
22386     int vlen = Matcher::vector_length(this);
22387     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22388     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22389     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22390   %}
22391   ins_pipe( pipe_slow );
22392 %}
22393 
22394 instruct vucast(vec dst, vec src) %{
22395   match(Set dst (VectorUCastB2X src));
22396   match(Set dst (VectorUCastS2X src));
22397   match(Set dst (VectorUCastI2X src));
22398   format %{ "vector_ucast $dst,$src\t!" %}
22399   ins_encode %{
22400     assert(UseAVX > 0, "required");
22401 
22402     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22403     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22404     int vlen_enc = vector_length_encoding(this);
22405     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22406   %}
22407   ins_pipe( pipe_slow );
22408 %}
22409 
22410 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22411   predicate(!VM_Version::supports_avx512vl() &&
22412             Matcher::vector_length_in_bytes(n) < 64 &&
22413             Matcher::vector_element_basic_type(n) == T_INT);
22414   match(Set dst (RoundVF src));
22415   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22416   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22417   ins_encode %{
22418     int vlen_enc = vector_length_encoding(this);
22419     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22420     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22421                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22422                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22423   %}
22424   ins_pipe( pipe_slow );
22425 %}
22426 
22427 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22428   predicate((VM_Version::supports_avx512vl() ||
22429              Matcher::vector_length_in_bytes(n) == 64) &&
22430              Matcher::vector_element_basic_type(n) == T_INT);
22431   match(Set dst (RoundVF src));
22432   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22433   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22434   ins_encode %{
22435     int vlen_enc = vector_length_encoding(this);
22436     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22437     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22438                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22439                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22440   %}
22441   ins_pipe( pipe_slow );
22442 %}
22443 
22444 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22445   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22446   match(Set dst (RoundVD src));
22447   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22448   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22449   ins_encode %{
22450     int vlen_enc = vector_length_encoding(this);
22451     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22452     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22453                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22454                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22455   %}
22456   ins_pipe( pipe_slow );
22457 %}
22458 
22459 // --------------------------------- VectorMaskCmp --------------------------------------
22460 
22461 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22462   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22463             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22464             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22465             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22466   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22467   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22468   ins_encode %{
22469     int vlen_enc = vector_length_encoding(this, $src1);
22470     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22471     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22472       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22473     } else {
22474       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22475     }
22476   %}
22477   ins_pipe( pipe_slow );
22478 %}
22479 
22480 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22481   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22482             n->bottom_type()->isa_pvectmask() == nullptr &&
22483             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22484   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22485   effect(TEMP ktmp);
22486   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22487   ins_encode %{
22488     int vlen_enc = Assembler::AVX_512bit;
22489     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22490     KRegister mask = k0; // The comparison itself is not being masked.
22491     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22492       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22493       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22494     } else {
22495       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22496       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22497     }
22498   %}
22499   ins_pipe( pipe_slow );
22500 %}
22501 
22502 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22503   predicate(n->bottom_type()->isa_pvectmask() &&
22504             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22505   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22506   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22507   ins_encode %{
22508     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22509     int vlen_enc = vector_length_encoding(this, $src1);
22510     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22511     KRegister mask = k0; // The comparison itself is not being masked.
22512     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22513       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22514     } else {
22515       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22516     }
22517   %}
22518   ins_pipe( pipe_slow );
22519 %}
22520 
22521 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22522   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22523             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22524             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22525             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22526             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22527             (n->in(2)->get_int() == BoolTest::eq ||
22528              n->in(2)->get_int() == BoolTest::lt ||
22529              n->in(2)->get_int() == BoolTest::gt)); // cond
22530   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22531   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22532   ins_encode %{
22533     int vlen_enc = vector_length_encoding(this, $src1);
22534     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22535     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22536     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22537   %}
22538   ins_pipe( pipe_slow );
22539 %}
22540 
22541 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22542   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22543             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22544             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22545             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22546             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22547             (n->in(2)->get_int() == BoolTest::ne ||
22548              n->in(2)->get_int() == BoolTest::le ||
22549              n->in(2)->get_int() == BoolTest::ge)); // cond
22550   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22551   effect(TEMP dst, TEMP xtmp);
22552   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22553   ins_encode %{
22554     int vlen_enc = vector_length_encoding(this, $src1);
22555     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22556     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22557     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22558   %}
22559   ins_pipe( pipe_slow );
22560 %}
22561 
22562 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22563   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22564             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22565             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22566             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22567             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22568   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22569   effect(TEMP dst, TEMP xtmp);
22570   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22571   ins_encode %{
22572     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22573     int vlen_enc = vector_length_encoding(this, $src1);
22574     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22575     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22576 
22577     if (vlen_enc == Assembler::AVX_128bit) {
22578       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22579     } else {
22580       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22581     }
22582     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22583     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22584     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22585   %}
22586   ins_pipe( pipe_slow );
22587 %}
22588 
22589 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22590   predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22591              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22592              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22593   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22594   effect(TEMP ktmp);
22595   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22596   ins_encode %{
22597     assert(UseAVX > 2, "required");
22598 
22599     int vlen_enc = vector_length_encoding(this, $src1);
22600     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22601     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22602     KRegister mask = k0; // The comparison itself is not being masked.
22603     bool merge = false;
22604     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22605 
22606     switch (src1_elem_bt) {
22607       case T_INT: {
22608         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22609         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22610         break;
22611       }
22612       case T_LONG: {
22613         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22614         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22615         break;
22616       }
22617       default: assert(false, "%s", type2name(src1_elem_bt));
22618     }
22619   %}
22620   ins_pipe( pipe_slow );
22621 %}
22622 
22623 
22624 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22625   predicate(n->bottom_type()->isa_pvectmask() &&
22626             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22627   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22628   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22629   ins_encode %{
22630     assert(UseAVX > 2, "required");
22631     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22632 
22633     int vlen_enc = vector_length_encoding(this, $src1);
22634     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22635     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22636     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22637 
22638     // Comparison i
22639     switch (src1_elem_bt) {
22640       case T_BYTE: {
22641         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22642         break;
22643       }
22644       case T_SHORT: {
22645         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22646         break;
22647       }
22648       case T_INT: {
22649         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22650         break;
22651       }
22652       case T_LONG: {
22653         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22654         break;
22655       }
22656       default: assert(false, "%s", type2name(src1_elem_bt));
22657     }
22658   %}
22659   ins_pipe( pipe_slow );
22660 %}
22661 
22662 // Extract
22663 
22664 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22665   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22666   match(Set dst (ExtractI src idx));
22667   match(Set dst (ExtractS src idx));
22668   match(Set dst (ExtractB src idx));
22669   format %{ "extractI $dst,$src,$idx\t!" %}
22670   ins_encode %{
22671     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22672 
22673     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22674     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22675   %}
22676   ins_pipe( pipe_slow );
22677 %}
22678 
22679 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22680   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22681             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22682   match(Set dst (ExtractI src idx));
22683   match(Set dst (ExtractS src idx));
22684   match(Set dst (ExtractB src idx));
22685   effect(TEMP vtmp);
22686   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22687   ins_encode %{
22688     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22689 
22690     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22691     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22692     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22693   %}
22694   ins_pipe( pipe_slow );
22695 %}
22696 
22697 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22698   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22699   match(Set dst (ExtractL src idx));
22700   format %{ "extractL $dst,$src,$idx\t!" %}
22701   ins_encode %{
22702     assert(UseSSE >= 4, "required");
22703     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22704 
22705     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22706   %}
22707   ins_pipe( pipe_slow );
22708 %}
22709 
22710 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22711   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22712             Matcher::vector_length(n->in(1)) == 8);  // src
22713   match(Set dst (ExtractL src idx));
22714   effect(TEMP vtmp);
22715   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22716   ins_encode %{
22717     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22718 
22719     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22720     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22721   %}
22722   ins_pipe( pipe_slow );
22723 %}
22724 
22725 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22726   predicate(Matcher::vector_length(n->in(1)) <= 4);
22727   match(Set dst (ExtractF src idx));
22728   effect(TEMP dst, TEMP vtmp);
22729   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22730   ins_encode %{
22731     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22732 
22733     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22734   %}
22735   ins_pipe( pipe_slow );
22736 %}
22737 
22738 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22739   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22740             Matcher::vector_length(n->in(1)/*src*/) == 16);
22741   match(Set dst (ExtractF src idx));
22742   effect(TEMP vtmp);
22743   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22744   ins_encode %{
22745     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22746 
22747     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22748     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22749   %}
22750   ins_pipe( pipe_slow );
22751 %}
22752 
22753 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22754   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22755   match(Set dst (ExtractD src idx));
22756   format %{ "extractD $dst,$src,$idx\t!" %}
22757   ins_encode %{
22758     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22759 
22760     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22761   %}
22762   ins_pipe( pipe_slow );
22763 %}
22764 
22765 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22766   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22767             Matcher::vector_length(n->in(1)) == 8);  // src
22768   match(Set dst (ExtractD src idx));
22769   effect(TEMP vtmp);
22770   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22771   ins_encode %{
22772     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22773 
22774     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22775     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22776   %}
22777   ins_pipe( pipe_slow );
22778 %}
22779 
22780 // --------------------------------- Vector Blend --------------------------------------
22781 
22782 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22783   predicate(UseAVX == 0);
22784   match(Set dst (VectorBlend (Binary dst src) mask));
22785   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22786   effect(TEMP tmp);
22787   ins_encode %{
22788     assert(UseSSE >= 4, "required");
22789 
22790     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22791       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22792     }
22793     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22794   %}
22795   ins_pipe( pipe_slow );
22796 %}
22797 
22798 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22799   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22800             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22801             Matcher::vector_length_in_bytes(n) <= 32 &&
22802             is_integral_type(Matcher::vector_element_basic_type(n)));
22803   match(Set dst (VectorBlend (Binary src1 src2) mask));
22804   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22805   ins_encode %{
22806     int vlen_enc = vector_length_encoding(this);
22807     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22808   %}
22809   ins_pipe( pipe_slow );
22810 %}
22811 
22812 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22813   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22814             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22815             Matcher::vector_length_in_bytes(n) <= 32 &&
22816             !is_integral_type(Matcher::vector_element_basic_type(n)));
22817   match(Set dst (VectorBlend (Binary src1 src2) mask));
22818   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22819   ins_encode %{
22820     int vlen_enc = vector_length_encoding(this);
22821     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22822   %}
22823   ins_pipe( pipe_slow );
22824 %}
22825 
22826 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22827   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22828             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22829             Matcher::vector_length_in_bytes(n) <= 32);
22830   match(Set dst (VectorBlend (Binary src1 src2) mask));
22831   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22832   effect(TEMP vtmp, TEMP dst);
22833   ins_encode %{
22834     int vlen_enc = vector_length_encoding(this);
22835     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22836     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22837     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22838   %}
22839   ins_pipe( pipe_slow );
22840 %}
22841 
22842 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22843   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22844             n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22845   match(Set dst (VectorBlend (Binary src1 src2) mask));
22846   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22847   effect(TEMP ktmp);
22848   ins_encode %{
22849      int vlen_enc = Assembler::AVX_512bit;
22850      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22851     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22852     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22853   %}
22854   ins_pipe( pipe_slow );
22855 %}
22856 
22857 
22858 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22859   predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22860             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22861              VM_Version::supports_avx512bw()));
22862   match(Set dst (VectorBlend (Binary src1 src2) mask));
22863   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22864   ins_encode %{
22865     int vlen_enc = vector_length_encoding(this);
22866     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22867     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22868   %}
22869   ins_pipe( pipe_slow );
22870 %}
22871 
22872 // --------------------------------- ABS --------------------------------------
22873 // a = |a|
22874 instruct vabsB_reg(vec dst, vec src) %{
22875   match(Set dst (AbsVB  src));
22876   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22877   ins_encode %{
22878     uint vlen = Matcher::vector_length(this);
22879     if (vlen <= 16) {
22880       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22881     } else {
22882       int vlen_enc = vector_length_encoding(this);
22883       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22884     }
22885   %}
22886   ins_pipe( pipe_slow );
22887 %}
22888 
22889 instruct vabsS_reg(vec dst, vec src) %{
22890   match(Set dst (AbsVS  src));
22891   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22892   ins_encode %{
22893     uint vlen = Matcher::vector_length(this);
22894     if (vlen <= 8) {
22895       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22896     } else {
22897       int vlen_enc = vector_length_encoding(this);
22898       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22899     }
22900   %}
22901   ins_pipe( pipe_slow );
22902 %}
22903 
22904 instruct vabsI_reg(vec dst, vec src) %{
22905   match(Set dst (AbsVI  src));
22906   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22907   ins_encode %{
22908     uint vlen = Matcher::vector_length(this);
22909     if (vlen <= 4) {
22910       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22911     } else {
22912       int vlen_enc = vector_length_encoding(this);
22913       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22914     }
22915   %}
22916   ins_pipe( pipe_slow );
22917 %}
22918 
22919 instruct vabsL_reg(vec dst, vec src) %{
22920   match(Set dst (AbsVL  src));
22921   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22922   ins_encode %{
22923     assert(UseAVX > 2, "required");
22924     int vlen_enc = vector_length_encoding(this);
22925     if (!VM_Version::supports_avx512vl()) {
22926       vlen_enc = Assembler::AVX_512bit;
22927     }
22928     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22929   %}
22930   ins_pipe( pipe_slow );
22931 %}
22932 
22933 // --------------------------------- ABSNEG --------------------------------------
22934 
22935 instruct vabsnegF(vec dst, vec src) %{
22936   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22937   match(Set dst (AbsVF src));
22938   match(Set dst (NegVF src));
22939   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22940   ins_cost(150);
22941   ins_encode %{
22942     int opcode = this->ideal_Opcode();
22943     int vlen = Matcher::vector_length(this);
22944     if (vlen == 2) {
22945       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22946     } else {
22947       assert(vlen == 8 || vlen == 16, "required");
22948       int vlen_enc = vector_length_encoding(this);
22949       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22950     }
22951   %}
22952   ins_pipe( pipe_slow );
22953 %}
22954 
22955 instruct vabsneg4F(vec dst) %{
22956   predicate(Matcher::vector_length(n) == 4);
22957   match(Set dst (AbsVF dst));
22958   match(Set dst (NegVF dst));
22959   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22960   ins_cost(150);
22961   ins_encode %{
22962     int opcode = this->ideal_Opcode();
22963     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22964   %}
22965   ins_pipe( pipe_slow );
22966 %}
22967 
22968 instruct vabsnegD(vec dst, vec src) %{
22969   match(Set dst (AbsVD  src));
22970   match(Set dst (NegVD  src));
22971   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22972   ins_encode %{
22973     int opcode = this->ideal_Opcode();
22974     uint vlen = Matcher::vector_length(this);
22975     if (vlen == 2) {
22976       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22977     } else {
22978       int vlen_enc = vector_length_encoding(this);
22979       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22980     }
22981   %}
22982   ins_pipe( pipe_slow );
22983 %}
22984 
22985 //------------------------------------- VectorTest --------------------------------------------
22986 
22987 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22988   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22989   match(Set cr (VectorTest src1 src2));
22990   effect(TEMP vtmp);
22991   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22992   ins_encode %{
22993     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22994     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22995     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22996   %}
22997   ins_pipe( pipe_slow );
22998 %}
22999 
23000 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23001   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23002   match(Set cr (VectorTest src1 src2));
23003   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23004   ins_encode %{
23005     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23006     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23007     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23008   %}
23009   ins_pipe( pipe_slow );
23010 %}
23011 
23012 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23013   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23014              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23015             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23016   match(Set cr (VectorTest src1 src2));
23017   effect(TEMP tmp);
23018   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23019   ins_encode %{
23020     uint masklen = Matcher::vector_length(this, $src1);
23021     __ kmovwl($tmp$$Register, $src1$$KRegister);
23022     __ andl($tmp$$Register, (1 << masklen) - 1);
23023     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23024   %}
23025   ins_pipe( pipe_slow );
23026 %}
23027 
23028 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23029   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23030              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23031             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23032   match(Set cr (VectorTest src1 src2));
23033   effect(TEMP tmp);
23034   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23035   ins_encode %{
23036     uint masklen = Matcher::vector_length(this, $src1);
23037     __ kmovwl($tmp$$Register, $src1$$KRegister);
23038     __ andl($tmp$$Register, (1 << masklen) - 1);
23039   %}
23040   ins_pipe( pipe_slow );
23041 %}
23042 
23043 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23044   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23045             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23046   match(Set cr (VectorTest src1 src2));
23047   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23048   ins_encode %{
23049     uint masklen = Matcher::vector_length(this, $src1);
23050     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23051   %}
23052   ins_pipe( pipe_slow );
23053 %}
23054 
23055 //------------------------------------- LoadMask --------------------------------------------
23056 
23057 instruct loadMask(legVec dst, legVec src) %{
23058   predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23059   match(Set dst (VectorLoadMask src));
23060   effect(TEMP dst);
23061   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23062   ins_encode %{
23063     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23064     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23065     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23066   %}
23067   ins_pipe( pipe_slow );
23068 %}
23069 
23070 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23071   predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23072   match(Set dst (VectorLoadMask src));
23073   effect(TEMP xtmp);
23074   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23075   ins_encode %{
23076     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23077                         true, Assembler::AVX_512bit);
23078   %}
23079   ins_pipe( pipe_slow );
23080 %}
23081 
23082 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23083   predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23084   match(Set dst (VectorLoadMask src));
23085   effect(TEMP xtmp);
23086   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23087   ins_encode %{
23088     int vlen_enc = vector_length_encoding(in(1));
23089     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23090                         false, vlen_enc);
23091   %}
23092   ins_pipe( pipe_slow );
23093 %}
23094 
23095 //------------------------------------- StoreMask --------------------------------------------
23096 
23097 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23098   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23099   match(Set dst (VectorStoreMask src size));
23100   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23101   ins_encode %{
23102     int vlen = Matcher::vector_length(this);
23103     if (vlen <= 16 && UseAVX <= 2) {
23104       assert(UseSSE >= 3, "required");
23105       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23106     } else {
23107       assert(UseAVX > 0, "required");
23108       int src_vlen_enc = vector_length_encoding(this, $src);
23109       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23110     }
23111   %}
23112   ins_pipe( pipe_slow );
23113 %}
23114 
23115 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23116   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23117   match(Set dst (VectorStoreMask src size));
23118   effect(TEMP_DEF dst, TEMP xtmp);
23119   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23120   ins_encode %{
23121     int vlen_enc = Assembler::AVX_128bit;
23122     int vlen = Matcher::vector_length(this);
23123     if (vlen <= 8) {
23124       assert(UseSSE >= 3, "required");
23125       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23126       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23127       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23128     } else {
23129       assert(UseAVX > 0, "required");
23130       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23131       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23132       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23133     }
23134   %}
23135   ins_pipe( pipe_slow );
23136 %}
23137 
23138 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23139   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23140   match(Set dst (VectorStoreMask src size));
23141   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23142   effect(TEMP_DEF dst, TEMP xtmp);
23143   ins_encode %{
23144     int vlen_enc = Assembler::AVX_128bit;
23145     int vlen = Matcher::vector_length(this);
23146     if (vlen <= 4) {
23147       assert(UseSSE >= 3, "required");
23148       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23149       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23150       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23151       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23152     } else {
23153       assert(UseAVX > 0, "required");
23154       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23155       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23156       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23157       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23158       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23159     }
23160   %}
23161   ins_pipe( pipe_slow );
23162 %}
23163 
23164 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23165   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23166   match(Set dst (VectorStoreMask src size));
23167   effect(TEMP_DEF dst, TEMP xtmp);
23168   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23169   ins_encode %{
23170     assert(UseSSE >= 3, "required");
23171     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23172     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23173     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23174     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23175     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23176   %}
23177   ins_pipe( pipe_slow );
23178 %}
23179 
23180 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23181   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23182   match(Set dst (VectorStoreMask src size));
23183   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23184   effect(TEMP_DEF dst, TEMP vtmp);
23185   ins_encode %{
23186     int vlen_enc = Assembler::AVX_128bit;
23187     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23188     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23189     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23190     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23191     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23192     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23193     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23194   %}
23195   ins_pipe( pipe_slow );
23196 %}
23197 
23198 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23199   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23200   match(Set dst (VectorStoreMask src size));
23201   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23202   ins_encode %{
23203     int src_vlen_enc = vector_length_encoding(this, $src);
23204     int dst_vlen_enc = vector_length_encoding(this);
23205     if (!VM_Version::supports_avx512vl()) {
23206       src_vlen_enc = Assembler::AVX_512bit;
23207     }
23208     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23209     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23210   %}
23211   ins_pipe( pipe_slow );
23212 %}
23213 
23214 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23215   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23216   match(Set dst (VectorStoreMask src size));
23217   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23218   ins_encode %{
23219     int src_vlen_enc = vector_length_encoding(this, $src);
23220     int dst_vlen_enc = vector_length_encoding(this);
23221     if (!VM_Version::supports_avx512vl()) {
23222       src_vlen_enc = Assembler::AVX_512bit;
23223     }
23224     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23225     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23226   %}
23227   ins_pipe( pipe_slow );
23228 %}
23229 
23230 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23231   predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23232   match(Set dst (VectorStoreMask mask size));
23233   effect(TEMP_DEF dst);
23234   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23235   ins_encode %{
23236     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23237     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23238                  false, Assembler::AVX_512bit, noreg);
23239     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23240   %}
23241   ins_pipe( pipe_slow );
23242 %}
23243 
23244 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23245   predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23246   match(Set dst (VectorStoreMask mask size));
23247   effect(TEMP_DEF dst);
23248   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23249   ins_encode %{
23250     int dst_vlen_enc = vector_length_encoding(this);
23251     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23252     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23253   %}
23254   ins_pipe( pipe_slow );
23255 %}
23256 
23257 instruct vmaskcast_evex(kReg dst) %{
23258   match(Set dst (VectorMaskCast dst));
23259   ins_cost(0);
23260   format %{ "vector_mask_cast $dst" %}
23261   ins_encode %{
23262     // empty
23263   %}
23264   ins_pipe(empty);
23265 %}
23266 
23267 instruct vmaskcast(vec dst) %{
23268   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23269   match(Set dst (VectorMaskCast dst));
23270   ins_cost(0);
23271   format %{ "vector_mask_cast $dst" %}
23272   ins_encode %{
23273     // empty
23274   %}
23275   ins_pipe(empty);
23276 %}
23277 
23278 instruct vmaskcast_avx(vec dst, vec src) %{
23279   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23280   match(Set dst (VectorMaskCast src));
23281   format %{ "vector_mask_cast $dst, $src" %}
23282   ins_encode %{
23283     int vlen = Matcher::vector_length(this);
23284     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23285     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23286     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23287   %}
23288   ins_pipe(pipe_slow);
23289 %}
23290 
23291 //-------------------------------- Load Iota Indices ----------------------------------
23292 
23293 instruct loadIotaIndices(vec dst, immI_0 src) %{
23294   match(Set dst (VectorLoadConst src));
23295   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23296   ins_encode %{
23297      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23298      BasicType bt = Matcher::vector_element_basic_type(this);
23299      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23300   %}
23301   ins_pipe( pipe_slow );
23302 %}
23303 
23304 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23305   match(Set dst (PopulateIndex src1 src2));
23306   effect(TEMP dst, TEMP vtmp);
23307   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23308   ins_encode %{
23309      assert($src2$$constant == 1, "required");
23310      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23311      int vlen_enc = vector_length_encoding(this);
23312      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23313      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23314      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23315      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23316   %}
23317   ins_pipe( pipe_slow );
23318 %}
23319 
23320 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23321   match(Set dst (PopulateIndex src1 src2));
23322   effect(TEMP dst, TEMP vtmp);
23323   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23324   ins_encode %{
23325      assert($src2$$constant == 1, "required");
23326      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23327      int vlen_enc = vector_length_encoding(this);
23328      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23329      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23330      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23331      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23332   %}
23333   ins_pipe( pipe_slow );
23334 %}
23335 
23336 //-------------------------------- Rearrange ----------------------------------
23337 
23338 // LoadShuffle/Rearrange for Byte
23339 instruct rearrangeB(vec dst, vec shuffle) %{
23340   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23341             Matcher::vector_length(n) < 32);
23342   match(Set dst (VectorRearrange dst shuffle));
23343   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23344   ins_encode %{
23345     assert(UseSSE >= 4, "required");
23346     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23347   %}
23348   ins_pipe( pipe_slow );
23349 %}
23350 
23351 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23352   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23353             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23354   match(Set dst (VectorRearrange src shuffle));
23355   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23356   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23357   ins_encode %{
23358     assert(UseAVX >= 2, "required");
23359     // Swap src into vtmp1
23360     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23361     // Shuffle swapped src to get entries from other 128 bit lane
23362     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23363     // Shuffle original src to get entries from self 128 bit lane
23364     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23365     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23366     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23367     // Perform the blend
23368     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23369   %}
23370   ins_pipe( pipe_slow );
23371 %}
23372 
23373 
23374 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23375   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23376             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23377   match(Set dst (VectorRearrange src shuffle));
23378   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23379   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23380   ins_encode %{
23381     int vlen_enc = vector_length_encoding(this);
23382     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23383                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23384                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23385   %}
23386   ins_pipe( pipe_slow );
23387 %}
23388 
23389 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23390   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23391             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23392   match(Set dst (VectorRearrange src shuffle));
23393   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23394   ins_encode %{
23395     int vlen_enc = vector_length_encoding(this);
23396     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23397   %}
23398   ins_pipe( pipe_slow );
23399 %}
23400 
23401 // LoadShuffle/Rearrange for Short
23402 
23403 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23404   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23405             !VM_Version::supports_avx512bw());
23406   match(Set dst (VectorLoadShuffle src));
23407   effect(TEMP dst, TEMP vtmp);
23408   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23409   ins_encode %{
23410     // Create a byte shuffle mask from short shuffle mask
23411     // only byte shuffle instruction available on these platforms
23412     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23413     if (UseAVX == 0) {
23414       assert(vlen_in_bytes <= 16, "required");
23415       // Multiply each shuffle by two to get byte index
23416       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23417       __ psllw($vtmp$$XMMRegister, 1);
23418 
23419       // Duplicate to create 2 copies of byte index
23420       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23421       __ psllw($dst$$XMMRegister, 8);
23422       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23423 
23424       // Add one to get alternate byte index
23425       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23426       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23427     } else {
23428       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23429       int vlen_enc = vector_length_encoding(this);
23430       // Multiply each shuffle by two to get byte index
23431       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23432 
23433       // Duplicate to create 2 copies of byte index
23434       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23435       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23436 
23437       // Add one to get alternate byte index
23438       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23439     }
23440   %}
23441   ins_pipe( pipe_slow );
23442 %}
23443 
23444 instruct rearrangeS(vec dst, vec shuffle) %{
23445   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23446             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23447   match(Set dst (VectorRearrange dst shuffle));
23448   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23449   ins_encode %{
23450     assert(UseSSE >= 4, "required");
23451     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23452   %}
23453   ins_pipe( pipe_slow );
23454 %}
23455 
23456 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23457   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23458             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23459   match(Set dst (VectorRearrange src shuffle));
23460   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23461   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23462   ins_encode %{
23463     assert(UseAVX >= 2, "required");
23464     // Swap src into vtmp1
23465     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23466     // Shuffle swapped src to get entries from other 128 bit lane
23467     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23468     // Shuffle original src to get entries from self 128 bit lane
23469     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23470     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23471     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23472     // Perform the blend
23473     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23474   %}
23475   ins_pipe( pipe_slow );
23476 %}
23477 
23478 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23479   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23480             VM_Version::supports_avx512bw());
23481   match(Set dst (VectorRearrange src shuffle));
23482   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23483   ins_encode %{
23484     int vlen_enc = vector_length_encoding(this);
23485     if (!VM_Version::supports_avx512vl()) {
23486       vlen_enc = Assembler::AVX_512bit;
23487     }
23488     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23489   %}
23490   ins_pipe( pipe_slow );
23491 %}
23492 
23493 // LoadShuffle/Rearrange for Integer and Float
23494 
23495 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23496   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23497             Matcher::vector_length(n) == 4 && UseAVX == 0);
23498   match(Set dst (VectorLoadShuffle src));
23499   effect(TEMP dst, TEMP vtmp);
23500   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23501   ins_encode %{
23502     assert(UseSSE >= 4, "required");
23503 
23504     // Create a byte shuffle mask from int shuffle mask
23505     // only byte shuffle instruction available on these platforms
23506 
23507     // Duplicate and multiply each shuffle by 4
23508     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23509     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23510     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23511     __ psllw($vtmp$$XMMRegister, 2);
23512 
23513     // Duplicate again to create 4 copies of byte index
23514     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23515     __ psllw($dst$$XMMRegister, 8);
23516     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23517 
23518     // Add 3,2,1,0 to get alternate byte index
23519     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23520     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23521   %}
23522   ins_pipe( pipe_slow );
23523 %}
23524 
23525 instruct rearrangeI(vec dst, vec shuffle) %{
23526   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23527             UseAVX == 0);
23528   match(Set dst (VectorRearrange dst shuffle));
23529   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23530   ins_encode %{
23531     assert(UseSSE >= 4, "required");
23532     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23533   %}
23534   ins_pipe( pipe_slow );
23535 %}
23536 
23537 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23538   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23539             UseAVX > 0);
23540   match(Set dst (VectorRearrange src shuffle));
23541   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23542   ins_encode %{
23543     int vlen_enc = vector_length_encoding(this);
23544     BasicType bt = Matcher::vector_element_basic_type(this);
23545     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23546   %}
23547   ins_pipe( pipe_slow );
23548 %}
23549 
23550 // LoadShuffle/Rearrange for Long and Double
23551 
23552 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23553   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23554             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23555   match(Set dst (VectorLoadShuffle src));
23556   effect(TEMP dst, TEMP vtmp);
23557   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23558   ins_encode %{
23559     assert(UseAVX >= 2, "required");
23560 
23561     int vlen_enc = vector_length_encoding(this);
23562     // Create a double word shuffle mask from long shuffle mask
23563     // only double word shuffle instruction available on these platforms
23564 
23565     // Multiply each shuffle by two to get double word index
23566     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23567 
23568     // Duplicate each double word shuffle
23569     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23570     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23571 
23572     // Add one to get alternate double word index
23573     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23574   %}
23575   ins_pipe( pipe_slow );
23576 %}
23577 
23578 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23579   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23580             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23581   match(Set dst (VectorRearrange src shuffle));
23582   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23583   ins_encode %{
23584     assert(UseAVX >= 2, "required");
23585 
23586     int vlen_enc = vector_length_encoding(this);
23587     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23588   %}
23589   ins_pipe( pipe_slow );
23590 %}
23591 
23592 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23593   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23594             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23595   match(Set dst (VectorRearrange src shuffle));
23596   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23597   ins_encode %{
23598     assert(UseAVX > 2, "required");
23599 
23600     int vlen_enc = vector_length_encoding(this);
23601     if (vlen_enc == Assembler::AVX_128bit) {
23602       vlen_enc = Assembler::AVX_256bit;
23603     }
23604     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23605   %}
23606   ins_pipe( pipe_slow );
23607 %}
23608 
23609 // --------------------------------- FMA --------------------------------------
23610 // a * b + c
23611 
23612 instruct vfmaF_reg(vec a, vec b, vec c) %{
23613   match(Set c (FmaVF  c (Binary a b)));
23614   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23615   ins_cost(150);
23616   ins_encode %{
23617     assert(UseFMA, "not enabled");
23618     int vlen_enc = vector_length_encoding(this);
23619     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23620   %}
23621   ins_pipe( pipe_slow );
23622 %}
23623 
23624 instruct vfmaF_mem(vec a, memory b, vec c) %{
23625   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23626   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23627   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23628   ins_cost(150);
23629   ins_encode %{
23630     assert(UseFMA, "not enabled");
23631     int vlen_enc = vector_length_encoding(this);
23632     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23633   %}
23634   ins_pipe( pipe_slow );
23635 %}
23636 
23637 instruct vfmaD_reg(vec a, vec b, vec c) %{
23638   match(Set c (FmaVD  c (Binary a b)));
23639   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23640   ins_cost(150);
23641   ins_encode %{
23642     assert(UseFMA, "not enabled");
23643     int vlen_enc = vector_length_encoding(this);
23644     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23645   %}
23646   ins_pipe( pipe_slow );
23647 %}
23648 
23649 instruct vfmaD_mem(vec a, memory b, vec c) %{
23650   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23651   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23652   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23653   ins_cost(150);
23654   ins_encode %{
23655     assert(UseFMA, "not enabled");
23656     int vlen_enc = vector_length_encoding(this);
23657     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23658   %}
23659   ins_pipe( pipe_slow );
23660 %}
23661 
23662 // --------------------------------- Vector Multiply Add --------------------------------------
23663 
23664 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23665   predicate(UseAVX == 0);
23666   match(Set dst (MulAddVS2VI dst src1));
23667   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23668   ins_encode %{
23669     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23670   %}
23671   ins_pipe( pipe_slow );
23672 %}
23673 
23674 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23675   predicate(UseAVX > 0);
23676   match(Set dst (MulAddVS2VI src1 src2));
23677   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23678   ins_encode %{
23679     int vlen_enc = vector_length_encoding(this);
23680     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23681   %}
23682   ins_pipe( pipe_slow );
23683 %}
23684 
23685 // --------------------------------- Vector Multiply Add Add ----------------------------------
23686 
23687 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23688   predicate(VM_Version::supports_avx512_vnni());
23689   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23690   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23691   ins_encode %{
23692     assert(UseAVX > 2, "required");
23693     int vlen_enc = vector_length_encoding(this);
23694     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23695   %}
23696   ins_pipe( pipe_slow );
23697   ins_cost(10);
23698 %}
23699 
23700 // --------------------------------- PopCount --------------------------------------
23701 
23702 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23703   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23704   match(Set dst (PopCountVI src));
23705   match(Set dst (PopCountVL src));
23706   format %{ "vector_popcount_integral $dst, $src" %}
23707   ins_encode %{
23708     int opcode = this->ideal_Opcode();
23709     int vlen_enc = vector_length_encoding(this, $src);
23710     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23711     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23712   %}
23713   ins_pipe( pipe_slow );
23714 %}
23715 
23716 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23717   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23718   match(Set dst (PopCountVI src mask));
23719   match(Set dst (PopCountVL src mask));
23720   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23721   ins_encode %{
23722     int vlen_enc = vector_length_encoding(this, $src);
23723     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23724     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23725     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23726   %}
23727   ins_pipe( pipe_slow );
23728 %}
23729 
23730 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23731   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23732   match(Set dst (PopCountVI src));
23733   match(Set dst (PopCountVL src));
23734   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23735   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23736   ins_encode %{
23737     int opcode = this->ideal_Opcode();
23738     int vlen_enc = vector_length_encoding(this, $src);
23739     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23740     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23741                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23742   %}
23743   ins_pipe( pipe_slow );
23744 %}
23745 
23746 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23747 
23748 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23749   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23750                                               Matcher::vector_length_in_bytes(n->in(1))));
23751   match(Set dst (CountTrailingZerosV src));
23752   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23753   ins_cost(400);
23754   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23755   ins_encode %{
23756     int vlen_enc = vector_length_encoding(this, $src);
23757     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23758     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23759                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23760   %}
23761   ins_pipe( pipe_slow );
23762 %}
23763 
23764 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23765   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23766             VM_Version::supports_avx512cd() &&
23767             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23768   match(Set dst (CountTrailingZerosV src));
23769   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23770   ins_cost(400);
23771   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23772   ins_encode %{
23773     int vlen_enc = vector_length_encoding(this, $src);
23774     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23775     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23776                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23777   %}
23778   ins_pipe( pipe_slow );
23779 %}
23780 
23781 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23782   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23783   match(Set dst (CountTrailingZerosV src));
23784   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23785   ins_cost(400);
23786   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23787   ins_encode %{
23788     int vlen_enc = vector_length_encoding(this, $src);
23789     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23790     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23791                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23792                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23793   %}
23794   ins_pipe( pipe_slow );
23795 %}
23796 
23797 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23798   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23799   match(Set dst (CountTrailingZerosV src));
23800   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23801   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23802   ins_encode %{
23803     int vlen_enc = vector_length_encoding(this, $src);
23804     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23805     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23806                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23807   %}
23808   ins_pipe( pipe_slow );
23809 %}
23810 
23811 
23812 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23813 
23814 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23815   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23816   effect(TEMP dst);
23817   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23818   ins_encode %{
23819     int vector_len = vector_length_encoding(this);
23820     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23821   %}
23822   ins_pipe( pipe_slow );
23823 %}
23824 
23825 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23826   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23827   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23828   effect(TEMP dst);
23829   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23830   ins_encode %{
23831     int vector_len = vector_length_encoding(this);
23832     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23833   %}
23834   ins_pipe( pipe_slow );
23835 %}
23836 
23837 // --------------------------------- Rotation Operations ----------------------------------
23838 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23839   match(Set dst (RotateLeftV src shift));
23840   match(Set dst (RotateRightV src shift));
23841   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23842   ins_encode %{
23843     int opcode      = this->ideal_Opcode();
23844     int vector_len  = vector_length_encoding(this);
23845     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23846     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23847   %}
23848   ins_pipe( pipe_slow );
23849 %}
23850 
23851 instruct vprorate(vec dst, vec src, vec shift) %{
23852   match(Set dst (RotateLeftV src shift));
23853   match(Set dst (RotateRightV src shift));
23854   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23855   ins_encode %{
23856     int opcode      = this->ideal_Opcode();
23857     int vector_len  = vector_length_encoding(this);
23858     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23859     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23860   %}
23861   ins_pipe( pipe_slow );
23862 %}
23863 
23864 // ---------------------------------- Masked Operations ------------------------------------
23865 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23866   predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23867   match(Set dst (LoadVectorMasked mem mask));
23868   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23869   ins_encode %{
23870     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23871     int vlen_enc = vector_length_encoding(this);
23872     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23873   %}
23874   ins_pipe( pipe_slow );
23875 %}
23876 
23877 
23878 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23879   predicate(n->in(3)->bottom_type()->isa_pvectmask());
23880   match(Set dst (LoadVectorMasked mem mask));
23881   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23882   ins_encode %{
23883     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23884     int vector_len = vector_length_encoding(this);
23885     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23886   %}
23887   ins_pipe( pipe_slow );
23888 %}
23889 
23890 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23891   predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23892   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23893   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23894   ins_encode %{
23895     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23896     int vlen_enc = vector_length_encoding(src_node);
23897     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23898     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23899   %}
23900   ins_pipe( pipe_slow );
23901 %}
23902 
23903 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23904   predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23905   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23906   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23907   ins_encode %{
23908     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23909     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23910     int vlen_enc = vector_length_encoding(src_node);
23911     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23912   %}
23913   ins_pipe( pipe_slow );
23914 %}
23915 
23916 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23917   match(Set addr (VerifyVectorAlignment addr mask));
23918   effect(KILL cr);
23919   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23920   ins_encode %{
23921     Label Lskip;
23922     // check if masked bits of addr are zero
23923     __ testq($addr$$Register, $mask$$constant);
23924     __ jccb(Assembler::equal, Lskip);
23925     __ stop("verify_vector_alignment found a misaligned vector memory access");
23926     __ bind(Lskip);
23927   %}
23928   ins_pipe(pipe_slow);
23929 %}
23930 
23931 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23932   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23933   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23934   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23935   ins_encode %{
23936     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23937     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23938 
23939     Label DONE;
23940     int vlen_enc = vector_length_encoding(this, $src1);
23941     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23942 
23943     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23944     __ mov64($dst$$Register, -1L);
23945     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23946     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23947     __ jccb(Assembler::carrySet, DONE);
23948     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23949     __ notq($dst$$Register);
23950     __ tzcntq($dst$$Register, $dst$$Register);
23951     __ bind(DONE);
23952   %}
23953   ins_pipe( pipe_slow );
23954 %}
23955 
23956 
23957 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23958   match(Set dst (VectorMaskGen len));
23959   effect(TEMP temp, KILL cr);
23960   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23961   ins_encode %{
23962     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23963   %}
23964   ins_pipe( pipe_slow );
23965 %}
23966 
23967 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23968   match(Set dst (VectorMaskGen len));
23969   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23970   effect(TEMP temp);
23971   ins_encode %{
23972     if ($len$$constant > 0) {
23973       __ mov64($temp$$Register, right_n_bits($len$$constant));
23974       __ kmovql($dst$$KRegister, $temp$$Register);
23975     } else {
23976       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23977     }
23978   %}
23979   ins_pipe( pipe_slow );
23980 %}
23981 
23982 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23983   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23984   match(Set dst (VectorMaskToLong mask));
23985   effect(TEMP dst, KILL cr);
23986   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23987   ins_encode %{
23988     int opcode = this->ideal_Opcode();
23989     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23990     int mask_len = Matcher::vector_length(this, $mask);
23991     int mask_size = mask_len * type2aelembytes(mbt);
23992     int vlen_enc = vector_length_encoding(this, $mask);
23993     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23994                              $dst$$Register, mask_len, mask_size, vlen_enc);
23995   %}
23996   ins_pipe( pipe_slow );
23997 %}
23998 
23999 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24000   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24001   match(Set dst (VectorMaskToLong mask));
24002   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24003   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24004   ins_encode %{
24005     int opcode = this->ideal_Opcode();
24006     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24007     int mask_len = Matcher::vector_length(this, $mask);
24008     int vlen_enc = vector_length_encoding(this, $mask);
24009     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24010                              $dst$$Register, mask_len, mbt, vlen_enc);
24011   %}
24012   ins_pipe( pipe_slow );
24013 %}
24014 
24015 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24016   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24017   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24018   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24019   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24020   ins_encode %{
24021     int opcode = this->ideal_Opcode();
24022     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24023     int mask_len = Matcher::vector_length(this, $mask);
24024     int vlen_enc = vector_length_encoding(this, $mask);
24025     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24026                              $dst$$Register, mask_len, mbt, vlen_enc);
24027   %}
24028   ins_pipe( pipe_slow );
24029 %}
24030 
24031 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24032   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24033   match(Set dst (VectorMaskTrueCount mask));
24034   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24035   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24036   ins_encode %{
24037     int opcode = this->ideal_Opcode();
24038     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24039     int mask_len = Matcher::vector_length(this, $mask);
24040     int mask_size = mask_len * type2aelembytes(mbt);
24041     int vlen_enc = vector_length_encoding(this, $mask);
24042     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24043                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24044   %}
24045   ins_pipe( pipe_slow );
24046 %}
24047 
24048 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24049   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24050   match(Set dst (VectorMaskTrueCount mask));
24051   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24052   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24053   ins_encode %{
24054     int opcode = this->ideal_Opcode();
24055     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24056     int mask_len = Matcher::vector_length(this, $mask);
24057     int vlen_enc = vector_length_encoding(this, $mask);
24058     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24059                              $tmp$$Register, mask_len, mbt, vlen_enc);
24060   %}
24061   ins_pipe( pipe_slow );
24062 %}
24063 
24064 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24065   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24066   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24067   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24068   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24069   ins_encode %{
24070     int opcode = this->ideal_Opcode();
24071     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24072     int mask_len = Matcher::vector_length(this, $mask);
24073     int vlen_enc = vector_length_encoding(this, $mask);
24074     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24075                              $tmp$$Register, mask_len, mbt, vlen_enc);
24076   %}
24077   ins_pipe( pipe_slow );
24078 %}
24079 
24080 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24081   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24082   match(Set dst (VectorMaskFirstTrue mask));
24083   match(Set dst (VectorMaskLastTrue mask));
24084   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24085   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24086   ins_encode %{
24087     int opcode = this->ideal_Opcode();
24088     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24089     int mask_len = Matcher::vector_length(this, $mask);
24090     int mask_size = mask_len * type2aelembytes(mbt);
24091     int vlen_enc = vector_length_encoding(this, $mask);
24092     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24093                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24094   %}
24095   ins_pipe( pipe_slow );
24096 %}
24097 
24098 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24099   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24100   match(Set dst (VectorMaskFirstTrue mask));
24101   match(Set dst (VectorMaskLastTrue mask));
24102   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24103   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24104   ins_encode %{
24105     int opcode = this->ideal_Opcode();
24106     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24107     int mask_len = Matcher::vector_length(this, $mask);
24108     int vlen_enc = vector_length_encoding(this, $mask);
24109     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24110                              $tmp$$Register, mask_len, mbt, vlen_enc);
24111   %}
24112   ins_pipe( pipe_slow );
24113 %}
24114 
24115 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24116   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24117   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24118   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24119   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24120   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24121   ins_encode %{
24122     int opcode = this->ideal_Opcode();
24123     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24124     int mask_len = Matcher::vector_length(this, $mask);
24125     int vlen_enc = vector_length_encoding(this, $mask);
24126     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24127                              $tmp$$Register, mask_len, mbt, vlen_enc);
24128   %}
24129   ins_pipe( pipe_slow );
24130 %}
24131 
24132 // --------------------------------- Compress/Expand Operations ---------------------------
24133 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24134   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24135   match(Set dst (CompressV src mask));
24136   match(Set dst (ExpandV src mask));
24137   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24138   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24139   ins_encode %{
24140     int opcode = this->ideal_Opcode();
24141     int vlen_enc = vector_length_encoding(this);
24142     BasicType bt  = Matcher::vector_element_basic_type(this);
24143     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24144                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24145   %}
24146   ins_pipe( pipe_slow );
24147 %}
24148 
24149 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24150   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24151   match(Set dst (CompressV src mask));
24152   match(Set dst (ExpandV src mask));
24153   format %{ "vector_compress_expand $dst, $src, $mask" %}
24154   ins_encode %{
24155     int opcode = this->ideal_Opcode();
24156     int vector_len = vector_length_encoding(this);
24157     BasicType bt  = Matcher::vector_element_basic_type(this);
24158     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24159   %}
24160   ins_pipe( pipe_slow );
24161 %}
24162 
24163 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24164   match(Set dst (CompressM mask));
24165   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24166   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24167   ins_encode %{
24168     assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
24169     int mask_len = Matcher::vector_length(this);
24170     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24171   %}
24172   ins_pipe( pipe_slow );
24173 %}
24174 
24175 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24176 
24177 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24178   predicate(!VM_Version::supports_gfni());
24179   match(Set dst (ReverseV src));
24180   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24181   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24182   ins_encode %{
24183     int vec_enc = vector_length_encoding(this);
24184     BasicType bt = Matcher::vector_element_basic_type(this);
24185     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24186                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24187   %}
24188   ins_pipe( pipe_slow );
24189 %}
24190 
24191 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24192   predicate(VM_Version::supports_gfni());
24193   match(Set dst (ReverseV src));
24194   effect(TEMP dst, TEMP xtmp);
24195   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24196   ins_encode %{
24197     int vec_enc = vector_length_encoding(this);
24198     BasicType bt  = Matcher::vector_element_basic_type(this);
24199     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24200     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24201                                $xtmp$$XMMRegister);
24202   %}
24203   ins_pipe( pipe_slow );
24204 %}
24205 
24206 instruct vreverse_byte_reg(vec dst, vec src) %{
24207   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24208   match(Set dst (ReverseBytesV src));
24209   effect(TEMP dst);
24210   format %{ "vector_reverse_byte $dst, $src" %}
24211   ins_encode %{
24212     int vec_enc = vector_length_encoding(this);
24213     BasicType bt = Matcher::vector_element_basic_type(this);
24214     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24215   %}
24216   ins_pipe( pipe_slow );
24217 %}
24218 
24219 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24220   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24221   match(Set dst (ReverseBytesV src));
24222   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24223   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24224   ins_encode %{
24225     int vec_enc = vector_length_encoding(this);
24226     BasicType bt = Matcher::vector_element_basic_type(this);
24227     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24228                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24229   %}
24230   ins_pipe( pipe_slow );
24231 %}
24232 
24233 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24234 
24235 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24236   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24237                                               Matcher::vector_length_in_bytes(n->in(1))));
24238   match(Set dst (CountLeadingZerosV src));
24239   format %{ "vector_count_leading_zeros $dst, $src" %}
24240   ins_encode %{
24241      int vlen_enc = vector_length_encoding(this, $src);
24242      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24243      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24244                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24245   %}
24246   ins_pipe( pipe_slow );
24247 %}
24248 
24249 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24250   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24251                                               Matcher::vector_length_in_bytes(n->in(1))));
24252   match(Set dst (CountLeadingZerosV src mask));
24253   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24254   ins_encode %{
24255     int vlen_enc = vector_length_encoding(this, $src);
24256     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24257     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24258     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24259                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24260   %}
24261   ins_pipe( pipe_slow );
24262 %}
24263 
24264 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24265   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24266             VM_Version::supports_avx512cd() &&
24267             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24268   match(Set dst (CountLeadingZerosV src));
24269   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24270   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24271   ins_encode %{
24272     int vlen_enc = vector_length_encoding(this, $src);
24273     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24274     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24275                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24276   %}
24277   ins_pipe( pipe_slow );
24278 %}
24279 
24280 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24281   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24282   match(Set dst (CountLeadingZerosV src));
24283   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24284   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24285   ins_encode %{
24286     int vlen_enc = vector_length_encoding(this, $src);
24287     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24288     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24289                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24290                                        $rtmp$$Register, true, vlen_enc);
24291   %}
24292   ins_pipe( pipe_slow );
24293 %}
24294 
24295 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24296   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24297             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24298   match(Set dst (CountLeadingZerosV src));
24299   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24300   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24301   ins_encode %{
24302     int vlen_enc = vector_length_encoding(this, $src);
24303     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24304     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24305                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24306   %}
24307   ins_pipe( pipe_slow );
24308 %}
24309 
24310 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24311   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24312             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24313   match(Set dst (CountLeadingZerosV src));
24314   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24315   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24316   ins_encode %{
24317     int vlen_enc = vector_length_encoding(this, $src);
24318     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24319     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24320                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24321   %}
24322   ins_pipe( pipe_slow );
24323 %}
24324 
24325 // ---------------------------------- Vector Masked Operations ------------------------------------
24326 
24327 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24328   match(Set dst (AddVB (Binary dst src2) mask));
24329   match(Set dst (AddVS (Binary dst src2) mask));
24330   match(Set dst (AddVI (Binary dst src2) mask));
24331   match(Set dst (AddVL (Binary dst src2) mask));
24332   match(Set dst (AddVF (Binary dst src2) mask));
24333   match(Set dst (AddVD (Binary dst src2) mask));
24334   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24335   ins_encode %{
24336     int vlen_enc = vector_length_encoding(this);
24337     BasicType bt = Matcher::vector_element_basic_type(this);
24338     int opc = this->ideal_Opcode();
24339     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24341   %}
24342   ins_pipe( pipe_slow );
24343 %}
24344 
24345 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24346   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24347   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24348   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24349   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24350   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24351   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24352   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24353   ins_encode %{
24354     int vlen_enc = vector_length_encoding(this);
24355     BasicType bt = Matcher::vector_element_basic_type(this);
24356     int opc = this->ideal_Opcode();
24357     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24358                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24359   %}
24360   ins_pipe( pipe_slow );
24361 %}
24362 
24363 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24364   match(Set dst (XorV (Binary dst src2) mask));
24365   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24366   ins_encode %{
24367     int vlen_enc = vector_length_encoding(this);
24368     BasicType bt = Matcher::vector_element_basic_type(this);
24369     int opc = this->ideal_Opcode();
24370     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24371                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24372   %}
24373   ins_pipe( pipe_slow );
24374 %}
24375 
24376 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24377   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24378   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24379   ins_encode %{
24380     int vlen_enc = vector_length_encoding(this);
24381     BasicType bt = Matcher::vector_element_basic_type(this);
24382     int opc = this->ideal_Opcode();
24383     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24384                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24385   %}
24386   ins_pipe( pipe_slow );
24387 %}
24388 
24389 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24390   match(Set dst (OrV (Binary dst src2) mask));
24391   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24392   ins_encode %{
24393     int vlen_enc = vector_length_encoding(this);
24394     BasicType bt = Matcher::vector_element_basic_type(this);
24395     int opc = this->ideal_Opcode();
24396     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24397                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24398   %}
24399   ins_pipe( pipe_slow );
24400 %}
24401 
24402 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24403   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24404   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24405   ins_encode %{
24406     int vlen_enc = vector_length_encoding(this);
24407     BasicType bt = Matcher::vector_element_basic_type(this);
24408     int opc = this->ideal_Opcode();
24409     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24410                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24411   %}
24412   ins_pipe( pipe_slow );
24413 %}
24414 
24415 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24416   match(Set dst (AndV (Binary dst src2) mask));
24417   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24418   ins_encode %{
24419     int vlen_enc = vector_length_encoding(this);
24420     BasicType bt = Matcher::vector_element_basic_type(this);
24421     int opc = this->ideal_Opcode();
24422     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24423                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24424   %}
24425   ins_pipe( pipe_slow );
24426 %}
24427 
24428 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24429   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24430   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24431   ins_encode %{
24432     int vlen_enc = vector_length_encoding(this);
24433     BasicType bt = Matcher::vector_element_basic_type(this);
24434     int opc = this->ideal_Opcode();
24435     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24436                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24437   %}
24438   ins_pipe( pipe_slow );
24439 %}
24440 
24441 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24442   match(Set dst (SubVB (Binary dst src2) mask));
24443   match(Set dst (SubVS (Binary dst src2) mask));
24444   match(Set dst (SubVI (Binary dst src2) mask));
24445   match(Set dst (SubVL (Binary dst src2) mask));
24446   match(Set dst (SubVF (Binary dst src2) mask));
24447   match(Set dst (SubVD (Binary dst src2) mask));
24448   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24449   ins_encode %{
24450     int vlen_enc = vector_length_encoding(this);
24451     BasicType bt = Matcher::vector_element_basic_type(this);
24452     int opc = this->ideal_Opcode();
24453     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24454                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24455   %}
24456   ins_pipe( pipe_slow );
24457 %}
24458 
24459 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24460   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24461   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24462   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24463   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24464   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24465   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24466   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24467   ins_encode %{
24468     int vlen_enc = vector_length_encoding(this);
24469     BasicType bt = Matcher::vector_element_basic_type(this);
24470     int opc = this->ideal_Opcode();
24471     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24472                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24473   %}
24474   ins_pipe( pipe_slow );
24475 %}
24476 
24477 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24478   match(Set dst (MulVS (Binary dst src2) mask));
24479   match(Set dst (MulVI (Binary dst src2) mask));
24480   match(Set dst (MulVL (Binary dst src2) mask));
24481   match(Set dst (MulVF (Binary dst src2) mask));
24482   match(Set dst (MulVD (Binary dst src2) mask));
24483   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24484   ins_encode %{
24485     int vlen_enc = vector_length_encoding(this);
24486     BasicType bt = Matcher::vector_element_basic_type(this);
24487     int opc = this->ideal_Opcode();
24488     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24489                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24490   %}
24491   ins_pipe( pipe_slow );
24492 %}
24493 
24494 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24495   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24496   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24497   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24498   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24499   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24500   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24501   ins_encode %{
24502     int vlen_enc = vector_length_encoding(this);
24503     BasicType bt = Matcher::vector_element_basic_type(this);
24504     int opc = this->ideal_Opcode();
24505     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24507   %}
24508   ins_pipe( pipe_slow );
24509 %}
24510 
24511 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24512   match(Set dst (SqrtVF dst mask));
24513   match(Set dst (SqrtVD dst mask));
24514   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24515   ins_encode %{
24516     int vlen_enc = vector_length_encoding(this);
24517     BasicType bt = Matcher::vector_element_basic_type(this);
24518     int opc = this->ideal_Opcode();
24519     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24520                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24521   %}
24522   ins_pipe( pipe_slow );
24523 %}
24524 
24525 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24526   match(Set dst (DivVF (Binary dst src2) mask));
24527   match(Set dst (DivVD (Binary dst src2) mask));
24528   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24529   ins_encode %{
24530     int vlen_enc = vector_length_encoding(this);
24531     BasicType bt = Matcher::vector_element_basic_type(this);
24532     int opc = this->ideal_Opcode();
24533     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24534                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24535   %}
24536   ins_pipe( pipe_slow );
24537 %}
24538 
24539 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24540   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24541   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24542   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24543   ins_encode %{
24544     int vlen_enc = vector_length_encoding(this);
24545     BasicType bt = Matcher::vector_element_basic_type(this);
24546     int opc = this->ideal_Opcode();
24547     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24548                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24549   %}
24550   ins_pipe( pipe_slow );
24551 %}
24552 
24553 
24554 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24555   match(Set dst (RotateLeftV (Binary dst shift) mask));
24556   match(Set dst (RotateRightV (Binary dst shift) mask));
24557   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24558   ins_encode %{
24559     int vlen_enc = vector_length_encoding(this);
24560     BasicType bt = Matcher::vector_element_basic_type(this);
24561     int opc = this->ideal_Opcode();
24562     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24563                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24564   %}
24565   ins_pipe( pipe_slow );
24566 %}
24567 
24568 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24569   match(Set dst (RotateLeftV (Binary dst src2) mask));
24570   match(Set dst (RotateRightV (Binary dst src2) mask));
24571   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24572   ins_encode %{
24573     int vlen_enc = vector_length_encoding(this);
24574     BasicType bt = Matcher::vector_element_basic_type(this);
24575     int opc = this->ideal_Opcode();
24576     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24577                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24578   %}
24579   ins_pipe( pipe_slow );
24580 %}
24581 
24582 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24583   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24584   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24585   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24586   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24587   ins_encode %{
24588     int vlen_enc = vector_length_encoding(this);
24589     BasicType bt = Matcher::vector_element_basic_type(this);
24590     int opc = this->ideal_Opcode();
24591     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24592                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24593   %}
24594   ins_pipe( pipe_slow );
24595 %}
24596 
24597 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24598   predicate(!n->as_ShiftV()->is_var_shift());
24599   match(Set dst (LShiftVS (Binary dst src2) mask));
24600   match(Set dst (LShiftVI (Binary dst src2) mask));
24601   match(Set dst (LShiftVL (Binary dst src2) mask));
24602   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24603   ins_encode %{
24604     int vlen_enc = vector_length_encoding(this);
24605     BasicType bt = Matcher::vector_element_basic_type(this);
24606     int opc = this->ideal_Opcode();
24607     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24608                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24609   %}
24610   ins_pipe( pipe_slow );
24611 %}
24612 
24613 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24614   predicate(n->as_ShiftV()->is_var_shift());
24615   match(Set dst (LShiftVS (Binary dst src2) mask));
24616   match(Set dst (LShiftVI (Binary dst src2) mask));
24617   match(Set dst (LShiftVL (Binary dst src2) mask));
24618   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24619   ins_encode %{
24620     int vlen_enc = vector_length_encoding(this);
24621     BasicType bt = Matcher::vector_element_basic_type(this);
24622     int opc = this->ideal_Opcode();
24623     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24624                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24625   %}
24626   ins_pipe( pipe_slow );
24627 %}
24628 
24629 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24630   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24631   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24632   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24633   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24634   ins_encode %{
24635     int vlen_enc = vector_length_encoding(this);
24636     BasicType bt = Matcher::vector_element_basic_type(this);
24637     int opc = this->ideal_Opcode();
24638     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24639                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24640   %}
24641   ins_pipe( pipe_slow );
24642 %}
24643 
24644 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24645   predicate(!n->as_ShiftV()->is_var_shift());
24646   match(Set dst (RShiftVS (Binary dst src2) mask));
24647   match(Set dst (RShiftVI (Binary dst src2) mask));
24648   match(Set dst (RShiftVL (Binary dst src2) mask));
24649   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24650   ins_encode %{
24651     int vlen_enc = vector_length_encoding(this);
24652     BasicType bt = Matcher::vector_element_basic_type(this);
24653     int opc = this->ideal_Opcode();
24654     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24655                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24656   %}
24657   ins_pipe( pipe_slow );
24658 %}
24659 
24660 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24661   predicate(n->as_ShiftV()->is_var_shift());
24662   match(Set dst (RShiftVS (Binary dst src2) mask));
24663   match(Set dst (RShiftVI (Binary dst src2) mask));
24664   match(Set dst (RShiftVL (Binary dst src2) mask));
24665   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24666   ins_encode %{
24667     int vlen_enc = vector_length_encoding(this);
24668     BasicType bt = Matcher::vector_element_basic_type(this);
24669     int opc = this->ideal_Opcode();
24670     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24671                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24672   %}
24673   ins_pipe( pipe_slow );
24674 %}
24675 
24676 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24677   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24678   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24679   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24680   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24681   ins_encode %{
24682     int vlen_enc = vector_length_encoding(this);
24683     BasicType bt = Matcher::vector_element_basic_type(this);
24684     int opc = this->ideal_Opcode();
24685     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24686                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24687   %}
24688   ins_pipe( pipe_slow );
24689 %}
24690 
24691 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24692   predicate(!n->as_ShiftV()->is_var_shift());
24693   match(Set dst (URShiftVS (Binary dst src2) mask));
24694   match(Set dst (URShiftVI (Binary dst src2) mask));
24695   match(Set dst (URShiftVL (Binary dst src2) mask));
24696   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24697   ins_encode %{
24698     int vlen_enc = vector_length_encoding(this);
24699     BasicType bt = Matcher::vector_element_basic_type(this);
24700     int opc = this->ideal_Opcode();
24701     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24702                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24703   %}
24704   ins_pipe( pipe_slow );
24705 %}
24706 
24707 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24708   predicate(n->as_ShiftV()->is_var_shift());
24709   match(Set dst (URShiftVS (Binary dst src2) mask));
24710   match(Set dst (URShiftVI (Binary dst src2) mask));
24711   match(Set dst (URShiftVL (Binary dst src2) mask));
24712   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24713   ins_encode %{
24714     int vlen_enc = vector_length_encoding(this);
24715     BasicType bt = Matcher::vector_element_basic_type(this);
24716     int opc = this->ideal_Opcode();
24717     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24718                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24719   %}
24720   ins_pipe( pipe_slow );
24721 %}
24722 
24723 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24724   match(Set dst (MaxV (Binary dst src2) mask));
24725   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24726   ins_encode %{
24727     int vlen_enc = vector_length_encoding(this);
24728     BasicType bt = Matcher::vector_element_basic_type(this);
24729     int opc = this->ideal_Opcode();
24730     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24731                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24732   %}
24733   ins_pipe( pipe_slow );
24734 %}
24735 
24736 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24737   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24738   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24739   ins_encode %{
24740     int vlen_enc = vector_length_encoding(this);
24741     BasicType bt = Matcher::vector_element_basic_type(this);
24742     int opc = this->ideal_Opcode();
24743     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24744                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24745   %}
24746   ins_pipe( pipe_slow );
24747 %}
24748 
24749 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24750   match(Set dst (MinV (Binary dst src2) mask));
24751   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24752   ins_encode %{
24753     int vlen_enc = vector_length_encoding(this);
24754     BasicType bt = Matcher::vector_element_basic_type(this);
24755     int opc = this->ideal_Opcode();
24756     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24757                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24758   %}
24759   ins_pipe( pipe_slow );
24760 %}
24761 
24762 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24763   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24764   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24765   ins_encode %{
24766     int vlen_enc = vector_length_encoding(this);
24767     BasicType bt = Matcher::vector_element_basic_type(this);
24768     int opc = this->ideal_Opcode();
24769     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24770                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24771   %}
24772   ins_pipe( pipe_slow );
24773 %}
24774 
24775 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24776   match(Set dst (VectorRearrange (Binary dst src2) mask));
24777   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24778   ins_encode %{
24779     int vlen_enc = vector_length_encoding(this);
24780     BasicType bt = Matcher::vector_element_basic_type(this);
24781     int opc = this->ideal_Opcode();
24782     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24783                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24784   %}
24785   ins_pipe( pipe_slow );
24786 %}
24787 
24788 instruct vabs_masked(vec dst, kReg mask) %{
24789   match(Set dst (AbsVB dst mask));
24790   match(Set dst (AbsVS dst mask));
24791   match(Set dst (AbsVI dst mask));
24792   match(Set dst (AbsVL dst mask));
24793   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24794   ins_encode %{
24795     int vlen_enc = vector_length_encoding(this);
24796     BasicType bt = Matcher::vector_element_basic_type(this);
24797     int opc = this->ideal_Opcode();
24798     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24799                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24800   %}
24801   ins_pipe( pipe_slow );
24802 %}
24803 
24804 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24805   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24806   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24807   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24808   ins_encode %{
24809     assert(UseFMA, "Needs FMA instructions support.");
24810     int vlen_enc = vector_length_encoding(this);
24811     BasicType bt = Matcher::vector_element_basic_type(this);
24812     int opc = this->ideal_Opcode();
24813     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24814                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24815   %}
24816   ins_pipe( pipe_slow );
24817 %}
24818 
24819 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24820   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24821   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24822   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24823   ins_encode %{
24824     assert(UseFMA, "Needs FMA instructions support.");
24825     int vlen_enc = vector_length_encoding(this);
24826     BasicType bt = Matcher::vector_element_basic_type(this);
24827     int opc = this->ideal_Opcode();
24828     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24829                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24830   %}
24831   ins_pipe( pipe_slow );
24832 %}
24833 
24834 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24835   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24836   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24837   ins_encode %{
24838     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24839     int vlen_enc = vector_length_encoding(this, $src1);
24840     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24841 
24842     // Comparison i
24843     switch (src1_elem_bt) {
24844       case T_BYTE: {
24845         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24846         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24847         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24848         break;
24849       }
24850       case T_SHORT: {
24851         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24852         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24853         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24854         break;
24855       }
24856       case T_INT: {
24857         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24858         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24859         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24860         break;
24861       }
24862       case T_LONG: {
24863         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24864         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24865         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24866         break;
24867       }
24868       case T_FLOAT: {
24869         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24870         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24871         break;
24872       }
24873       case T_DOUBLE: {
24874         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24875         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24876         break;
24877       }
24878       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24879     }
24880   %}
24881   ins_pipe( pipe_slow );
24882 %}
24883 
24884 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24885   predicate(Matcher::vector_length(n) <= 32);
24886   match(Set dst (MaskAll src));
24887   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24888   ins_encode %{
24889     int mask_len = Matcher::vector_length(this);
24890     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24891   %}
24892   ins_pipe( pipe_slow );
24893 %}
24894 
24895 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24896   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24897   match(Set dst (XorVMask src (MaskAll cnt)));
24898   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24899   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24900   ins_encode %{
24901     uint masklen = Matcher::vector_length(this);
24902     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24903   %}
24904   ins_pipe( pipe_slow );
24905 %}
24906 
24907 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24908   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24909             (Matcher::vector_length(n) == 16) ||
24910             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24911   match(Set dst (XorVMask src (MaskAll cnt)));
24912   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24913   ins_encode %{
24914     uint masklen = Matcher::vector_length(this);
24915     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24916   %}
24917   ins_pipe( pipe_slow );
24918 %}
24919 
24920 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24921   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24922   match(Set dst (VectorLongToMask src));
24923   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24924   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24925   ins_encode %{
24926     int mask_len = Matcher::vector_length(this);
24927     int vec_enc  = vector_length_encoding(mask_len);
24928     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24929                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24930   %}
24931   ins_pipe( pipe_slow );
24932 %}
24933 
24934 
24935 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24936   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24937   match(Set dst (VectorLongToMask src));
24938   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24939   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24940   ins_encode %{
24941     int mask_len = Matcher::vector_length(this);
24942     assert(mask_len <= 32, "invalid mask length");
24943     int vec_enc  = vector_length_encoding(mask_len);
24944     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24945                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24946   %}
24947   ins_pipe( pipe_slow );
24948 %}
24949 
24950 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24951   predicate(n->bottom_type()->isa_pvectmask());
24952   match(Set dst (VectorLongToMask src));
24953   format %{ "long_to_mask_evex $dst, $src\t!" %}
24954   ins_encode %{
24955     __ kmov($dst$$KRegister, $src$$Register);
24956   %}
24957   ins_pipe( pipe_slow );
24958 %}
24959 
24960 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24961   match(Set dst (AndVMask src1 src2));
24962   match(Set dst (OrVMask src1 src2));
24963   match(Set dst (XorVMask src1 src2));
24964   effect(TEMP kscratch);
24965   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24966   ins_encode %{
24967     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24968     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24969     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24970     uint masklen = Matcher::vector_length(this);
24971     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24972     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24973   %}
24974   ins_pipe( pipe_slow );
24975 %}
24976 
24977 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24978   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24979   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24980   ins_encode %{
24981     int vlen_enc = vector_length_encoding(this);
24982     BasicType bt = Matcher::vector_element_basic_type(this);
24983     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24984                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24985   %}
24986   ins_pipe( pipe_slow );
24987 %}
24988 
24989 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24990   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24991   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24992   ins_encode %{
24993     int vlen_enc = vector_length_encoding(this);
24994     BasicType bt = Matcher::vector_element_basic_type(this);
24995     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24996                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24997   %}
24998   ins_pipe( pipe_slow );
24999 %}
25000 
25001 instruct castMM(kReg dst)
25002 %{
25003   match(Set dst (CastVV dst));
25004 
25005   size(0);
25006   format %{ "# castVV of $dst" %}
25007   ins_encode(/* empty encoding */);
25008   ins_cost(0);
25009   ins_pipe(empty);
25010 %}
25011 
25012 instruct castVV(vec dst)
25013 %{
25014   match(Set dst (CastVV dst));
25015 
25016   size(0);
25017   format %{ "# castVV of $dst" %}
25018   ins_encode(/* empty encoding */);
25019   ins_cost(0);
25020   ins_pipe(empty);
25021 %}
25022 
25023 instruct castVVLeg(legVec dst)
25024 %{
25025   match(Set dst (CastVV dst));
25026 
25027   size(0);
25028   format %{ "# castVV of $dst" %}
25029   ins_encode(/* empty encoding */);
25030   ins_cost(0);
25031   ins_pipe(empty);
25032 %}
25033 
25034 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25035 %{
25036   match(Set dst (IsInfiniteF src));
25037   effect(TEMP ktmp, KILL cr);
25038   format %{ "float_class_check $dst, $src" %}
25039   ins_encode %{
25040     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25041     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25042   %}
25043   ins_pipe(pipe_slow);
25044 %}
25045 
25046 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25047 %{
25048   match(Set dst (IsInfiniteD src));
25049   effect(TEMP ktmp, KILL cr);
25050   format %{ "double_class_check $dst, $src" %}
25051   ins_encode %{
25052     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25053     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25054   %}
25055   ins_pipe(pipe_slow);
25056 %}
25057 
25058 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25059 %{
25060   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25061             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25062   match(Set dst (SaturatingAddV src1 src2));
25063   match(Set dst (SaturatingSubV src1 src2));
25064   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25065   ins_encode %{
25066     int vlen_enc = vector_length_encoding(this);
25067     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25068     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25069                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25070   %}
25071   ins_pipe(pipe_slow);
25072 %}
25073 
25074 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25075 %{
25076   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25077             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25078   match(Set dst (SaturatingAddV src1 src2));
25079   match(Set dst (SaturatingSubV src1 src2));
25080   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25081   ins_encode %{
25082     int vlen_enc = vector_length_encoding(this);
25083     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25084     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25085                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25086   %}
25087   ins_pipe(pipe_slow);
25088 %}
25089 
25090 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25091 %{
25092   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25093             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25094             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25095   match(Set dst (SaturatingAddV src1 src2));
25096   match(Set dst (SaturatingSubV src1 src2));
25097   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25098   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25099   ins_encode %{
25100     int vlen_enc = vector_length_encoding(this);
25101     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25102     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25103                                         $src1$$XMMRegister, $src2$$XMMRegister,
25104                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25105                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25106   %}
25107   ins_pipe(pipe_slow);
25108 %}
25109 
25110 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25111 %{
25112   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25113             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25114             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25115   match(Set dst (SaturatingAddV src1 src2));
25116   match(Set dst (SaturatingSubV src1 src2));
25117   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25118   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25119   ins_encode %{
25120     int vlen_enc = vector_length_encoding(this);
25121     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25122     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25123                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25124                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25125   %}
25126   ins_pipe(pipe_slow);
25127 %}
25128 
25129 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25130 %{
25131   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25132             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25133             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25134   match(Set dst (SaturatingAddV src1 src2));
25135   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25136   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25137   ins_encode %{
25138     int vlen_enc = vector_length_encoding(this);
25139     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25140     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25141                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25142   %}
25143   ins_pipe(pipe_slow);
25144 %}
25145 
25146 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25147 %{
25148   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25149             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25150             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25151   match(Set dst (SaturatingAddV src1 src2));
25152   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25153   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25154   ins_encode %{
25155     int vlen_enc = vector_length_encoding(this);
25156     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25157     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25158                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25159   %}
25160   ins_pipe(pipe_slow);
25161 %}
25162 
25163 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25164 %{
25165   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25166             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25167             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25168   match(Set dst (SaturatingSubV src1 src2));
25169   effect(TEMP ktmp);
25170   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25171   ins_encode %{
25172     int vlen_enc = vector_length_encoding(this);
25173     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25174     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25175                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25176   %}
25177   ins_pipe(pipe_slow);
25178 %}
25179 
25180 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25181 %{
25182   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25183             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25184             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25185   match(Set dst (SaturatingSubV src1 src2));
25186   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25187   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25188   ins_encode %{
25189     int vlen_enc = vector_length_encoding(this);
25190     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25191     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25192                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25193   %}
25194   ins_pipe(pipe_slow);
25195 %}
25196 
25197 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25198 %{
25199   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25200             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25201   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25202   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25203   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25204   ins_encode %{
25205     int vlen_enc = vector_length_encoding(this);
25206     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25207     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25208                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25209   %}
25210   ins_pipe(pipe_slow);
25211 %}
25212 
25213 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25214 %{
25215   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25216             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25217   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25218   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25219   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25220   ins_encode %{
25221     int vlen_enc = vector_length_encoding(this);
25222     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25223     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25224                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25225   %}
25226   ins_pipe(pipe_slow);
25227 %}
25228 
25229 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25230   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25231             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25232   match(Set dst (SaturatingAddV (Binary dst src) mask));
25233   match(Set dst (SaturatingSubV (Binary dst src) mask));
25234   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25235   ins_encode %{
25236     int vlen_enc = vector_length_encoding(this);
25237     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25238     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25239                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25240   %}
25241   ins_pipe( pipe_slow );
25242 %}
25243 
25244 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25245   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25246             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25247   match(Set dst (SaturatingAddV (Binary dst src) mask));
25248   match(Set dst (SaturatingSubV (Binary dst src) mask));
25249   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25250   ins_encode %{
25251     int vlen_enc = vector_length_encoding(this);
25252     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25253     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25254                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25255   %}
25256   ins_pipe( pipe_slow );
25257 %}
25258 
25259 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25260   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25261             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25262   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25263   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25264   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25265   ins_encode %{
25266     int vlen_enc = vector_length_encoding(this);
25267     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25268     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25269                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25270   %}
25271   ins_pipe( pipe_slow );
25272 %}
25273 
25274 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25275   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25276             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25277   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25278   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25279   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25280   ins_encode %{
25281     int vlen_enc = vector_length_encoding(this);
25282     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25283     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25284                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25285   %}
25286   ins_pipe( pipe_slow );
25287 %}
25288 
25289 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25290 %{
25291   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25292   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25293   ins_encode %{
25294     int vlen_enc = vector_length_encoding(this);
25295     BasicType bt = Matcher::vector_element_basic_type(this);
25296     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25297   %}
25298   ins_pipe(pipe_slow);
25299 %}
25300 
25301 instruct reinterpretS2HF(regF dst, rRegI src)
25302 %{
25303   match(Set dst (ReinterpretS2HF src));
25304   format %{ "evmovw $dst, $src" %}
25305   ins_encode %{
25306     __ evmovw($dst$$XMMRegister, $src$$Register);
25307   %}
25308   ins_pipe(pipe_slow);
25309 %}
25310 
25311 instruct reinterpretHF2S(rRegI dst, regF src)
25312 %{
25313   match(Set dst (ReinterpretHF2S src));
25314   format %{ "evmovw $dst, $src" %}
25315   ins_encode %{
25316     __ evmovw($dst$$Register, $src$$XMMRegister);
25317     __ narrow_subword_type($dst$$Register, T_SHORT);
25318   %}
25319   ins_pipe(pipe_slow);
25320 %}
25321 
25322 instruct convF2HFAndS2HF(regF dst, regF src)
25323 %{
25324   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25325   format %{ "convF2HFAndS2HF $dst, $src" %}
25326   ins_encode %{
25327     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25328   %}
25329   ins_pipe(pipe_slow);
25330 %}
25331 
25332 instruct convHF2SAndHF2F(regF dst, regF src)
25333 %{
25334   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25335   format %{ "convHF2SAndHF2F $dst, $src" %}
25336   ins_encode %{
25337     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25338   %}
25339   ins_pipe(pipe_slow);
25340 %}
25341 
25342 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25343 %{
25344   match(Set dst (SqrtHF src));
25345   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25346   ins_encode %{
25347     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25348   %}
25349   ins_pipe(pipe_slow);
25350 %}
25351 
25352 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25353 %{
25354   match(Set dst (AddHF src1 src2));
25355   match(Set dst (DivHF src1 src2));
25356   match(Set dst (MulHF src1 src2));
25357   match(Set dst (SubHF src1 src2));
25358   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25359   ins_encode %{
25360     int opcode = this->ideal_Opcode();
25361     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25362   %}
25363   ins_pipe(pipe_slow);
25364 %}
25365 
25366 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25367 %{
25368   predicate(VM_Version::supports_avx10_2());
25369   match(Set dst (MaxHF src1 src2));
25370   match(Set dst (MinHF src1 src2));
25371 
25372   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25373   ins_encode %{
25374     int opcode = this->ideal_Opcode();
25375     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25376   %}
25377   ins_pipe( pipe_slow );
25378 %}
25379 
25380 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25381 %{
25382   predicate(!VM_Version::supports_avx10_2());
25383   match(Set dst (MaxHF src1 src2));
25384   match(Set dst (MinHF src1 src2));
25385   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25386 
25387   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25388   ins_encode %{
25389     int opcode = this->ideal_Opcode();
25390     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25391                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25392   %}
25393   ins_pipe( pipe_slow );
25394 %}
25395 
25396 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25397 %{
25398   match(Set dst (FmaHF  src2 (Binary dst src1)));
25399   effect(DEF dst);
25400   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25401   ins_encode %{
25402     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25403   %}
25404   ins_pipe( pipe_slow );
25405 %}
25406 
25407 
25408 instruct vector_sqrt_HF_reg(vec dst, vec src)
25409 %{
25410   match(Set dst (SqrtVHF src));
25411   format %{ "vector_sqrt_fp16 $dst, $src" %}
25412   ins_encode %{
25413     int vlen_enc = vector_length_encoding(this);
25414     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25415   %}
25416   ins_pipe(pipe_slow);
25417 %}
25418 
25419 instruct vector_sqrt_HF_mem(vec dst, memory src)
25420 %{
25421   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25422   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25423   ins_encode %{
25424     int vlen_enc = vector_length_encoding(this);
25425     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25426   %}
25427   ins_pipe(pipe_slow);
25428 %}
25429 
25430 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25431 %{
25432   match(Set dst (AddVHF src1 src2));
25433   match(Set dst (DivVHF src1 src2));
25434   match(Set dst (MulVHF src1 src2));
25435   match(Set dst (SubVHF src1 src2));
25436   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25437   ins_encode %{
25438     int vlen_enc = vector_length_encoding(this);
25439     int opcode = this->ideal_Opcode();
25440     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25441   %}
25442   ins_pipe(pipe_slow);
25443 %}
25444 
25445 
25446 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25447 %{
25448   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25449   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25450   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25451   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25452   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25453   ins_encode %{
25454     int vlen_enc = vector_length_encoding(this);
25455     int opcode = this->ideal_Opcode();
25456     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25457   %}
25458   ins_pipe(pipe_slow);
25459 %}
25460 
25461 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25462 %{
25463   match(Set dst (FmaVHF src2 (Binary dst src1)));
25464   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25465   ins_encode %{
25466     int vlen_enc = vector_length_encoding(this);
25467     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25468   %}
25469   ins_pipe( pipe_slow );
25470 %}
25471 
25472 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25473 %{
25474   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25475   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25476   ins_encode %{
25477     int vlen_enc = vector_length_encoding(this);
25478     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25479   %}
25480   ins_pipe( pipe_slow );
25481 %}
25482 
25483 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25484 %{
25485   predicate(VM_Version::supports_avx10_2());
25486   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25487   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25488   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25489   ins_encode %{
25490     int vlen_enc = vector_length_encoding(this);
25491     int opcode = this->ideal_Opcode();
25492     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25493                             k0, vlen_enc);
25494   %}
25495   ins_pipe( pipe_slow );
25496 %}
25497 
25498 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25499 %{
25500   predicate(VM_Version::supports_avx10_2());
25501   match(Set dst (MinVHF src1 src2));
25502   match(Set dst (MaxVHF src1 src2));
25503   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25504   ins_encode %{
25505     int vlen_enc = vector_length_encoding(this);
25506     int opcode = this->ideal_Opcode();
25507     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25508                             k0, vlen_enc);
25509   %}
25510   ins_pipe( pipe_slow );
25511 %}
25512 
25513 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25514 %{
25515   predicate(!VM_Version::supports_avx10_2());
25516   match(Set dst (MinVHF src1 src2));
25517   match(Set dst (MaxVHF src1 src2));
25518   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25519   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25520   ins_encode %{
25521     int vlen_enc = vector_length_encoding(this);
25522     int opcode = this->ideal_Opcode();
25523     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25524                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25525   %}
25526   ins_pipe( pipe_slow );
25527 %}
25528 
25529 //----------PEEPHOLE RULES-----------------------------------------------------
25530 // These must follow all instruction definitions as they use the names
25531 // defined in the instructions definitions.
25532 //
25533 // peeppredicate ( rule_predicate );
25534 // // the predicate unless which the peephole rule will be ignored
25535 //
25536 // peepmatch ( root_instr_name [preceding_instruction]* );
25537 //
25538 // peepprocedure ( procedure_name );
25539 // // provide a procedure name to perform the optimization, the procedure should
25540 // // reside in the architecture dependent peephole file, the method has the
25541 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25542 // // with the arguments being the basic block, the current node index inside the
25543 // // block, the register allocator, the functions upon invoked return a new node
25544 // // defined in peepreplace, and the rules of the nodes appearing in the
25545 // // corresponding peepmatch, the function return true if successful, else
25546 // // return false
25547 //
25548 // peepconstraint %{
25549 // (instruction_number.operand_name relational_op instruction_number.operand_name
25550 //  [, ...] );
25551 // // instruction numbers are zero-based using left to right order in peepmatch
25552 //
25553 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25554 // // provide an instruction_number.operand_name for each operand that appears
25555 // // in the replacement instruction's match rule
25556 //
25557 // ---------VM FLAGS---------------------------------------------------------
25558 //
25559 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25560 //
25561 // Each peephole rule is given an identifying number starting with zero and
25562 // increasing by one in the order seen by the parser.  An individual peephole
25563 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25564 // on the command-line.
25565 //
25566 // ---------CURRENT LIMITATIONS----------------------------------------------
25567 //
25568 // Only transformations inside a basic block (do we need more for peephole)
25569 //
25570 // ---------EXAMPLE----------------------------------------------------------
25571 //
25572 // // pertinent parts of existing instructions in architecture description
25573 // instruct movI(rRegI dst, rRegI src)
25574 // %{
25575 //   match(Set dst (CopyI src));
25576 // %}
25577 //
25578 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25579 // %{
25580 //   match(Set dst (AddI dst src));
25581 //   effect(KILL cr);
25582 // %}
25583 //
25584 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25585 // %{
25586 //   match(Set dst (AddI dst src));
25587 // %}
25588 //
25589 // 1. Simple replacement
25590 // - Only match adjacent instructions in same basic block
25591 // - Only equality constraints
25592 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25593 // - Only one replacement instruction
25594 //
25595 // // Change (inc mov) to lea
25596 // peephole %{
25597 //   // lea should only be emitted when beneficial
25598 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25599 //   // increment preceded by register-register move
25600 //   peepmatch ( incI_rReg movI );
25601 //   // require that the destination register of the increment
25602 //   // match the destination register of the move
25603 //   peepconstraint ( 0.dst == 1.dst );
25604 //   // construct a replacement instruction that sets
25605 //   // the destination to ( move's source register + one )
25606 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25607 // %}
25608 //
25609 // 2. Procedural replacement
25610 // - More flexible finding relevent nodes
25611 // - More flexible constraints
25612 // - More flexible transformations
25613 // - May utilise architecture-dependent API more effectively
25614 // - Currently only one replacement instruction due to adlc parsing capabilities
25615 //
25616 // // Change (inc mov) to lea
25617 // peephole %{
25618 //   // lea should only be emitted when beneficial
25619 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25620 //   // the rule numbers of these nodes inside are passed into the function below
25621 //   peepmatch ( incI_rReg movI );
25622 //   // the method that takes the responsibility of transformation
25623 //   peepprocedure ( inc_mov_to_lea );
25624 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25625 //   // node is passed into the function above
25626 //   peepreplace ( leaI_rReg_immI() );
25627 // %}
25628 
25629 // These instructions is not matched by the matcher but used by the peephole
25630 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25631 %{
25632   predicate(false);
25633   match(Set dst (AddI src1 src2));
25634   format %{ "leal    $dst, [$src1 + $src2]" %}
25635   ins_encode %{
25636     Register dst = $dst$$Register;
25637     Register src1 = $src1$$Register;
25638     Register src2 = $src2$$Register;
25639     if (src1 != rbp && src1 != r13) {
25640       __ leal(dst, Address(src1, src2, Address::times_1));
25641     } else {
25642       assert(src2 != rbp && src2 != r13, "");
25643       __ leal(dst, Address(src2, src1, Address::times_1));
25644     }
25645   %}
25646   ins_pipe(ialu_reg_reg);
25647 %}
25648 
25649 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25650 %{
25651   predicate(false);
25652   match(Set dst (AddI src1 src2));
25653   format %{ "leal    $dst, [$src1 + $src2]" %}
25654   ins_encode %{
25655     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25656   %}
25657   ins_pipe(ialu_reg_reg);
25658 %}
25659 
25660 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25661 %{
25662   predicate(false);
25663   match(Set dst (LShiftI src shift));
25664   format %{ "leal    $dst, [$src << $shift]" %}
25665   ins_encode %{
25666     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25667     Register src = $src$$Register;
25668     if (scale == Address::times_2 && src != rbp && src != r13) {
25669       __ leal($dst$$Register, Address(src, src, Address::times_1));
25670     } else {
25671       __ leal($dst$$Register, Address(noreg, src, scale));
25672     }
25673   %}
25674   ins_pipe(ialu_reg_reg);
25675 %}
25676 
25677 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25678 %{
25679   predicate(false);
25680   match(Set dst (AddL src1 src2));
25681   format %{ "leaq    $dst, [$src1 + $src2]" %}
25682   ins_encode %{
25683     Register dst = $dst$$Register;
25684     Register src1 = $src1$$Register;
25685     Register src2 = $src2$$Register;
25686     if (src1 != rbp && src1 != r13) {
25687       __ leaq(dst, Address(src1, src2, Address::times_1));
25688     } else {
25689       assert(src2 != rbp && src2 != r13, "");
25690       __ leaq(dst, Address(src2, src1, Address::times_1));
25691     }
25692   %}
25693   ins_pipe(ialu_reg_reg);
25694 %}
25695 
25696 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25697 %{
25698   predicate(false);
25699   match(Set dst (AddL src1 src2));
25700   format %{ "leaq    $dst, [$src1 + $src2]" %}
25701   ins_encode %{
25702     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25703   %}
25704   ins_pipe(ialu_reg_reg);
25705 %}
25706 
25707 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25708 %{
25709   predicate(false);
25710   match(Set dst (LShiftL src shift));
25711   format %{ "leaq    $dst, [$src << $shift]" %}
25712   ins_encode %{
25713     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25714     Register src = $src$$Register;
25715     if (scale == Address::times_2 && src != rbp && src != r13) {
25716       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25717     } else {
25718       __ leaq($dst$$Register, Address(noreg, src, scale));
25719     }
25720   %}
25721   ins_pipe(ialu_reg_reg);
25722 %}
25723 
25724 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25725 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25726 // processors with at least partial ALU support for lea
25727 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25728 // beneficial for processors with full ALU support
25729 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25730 
25731 peephole
25732 %{
25733   peeppredicate(VM_Version::supports_fast_2op_lea());
25734   peepmatch (addI_rReg);
25735   peepprocedure (lea_coalesce_reg);
25736   peepreplace (leaI_rReg_rReg_peep());
25737 %}
25738 
25739 peephole
25740 %{
25741   peeppredicate(VM_Version::supports_fast_2op_lea());
25742   peepmatch (addI_rReg_imm);
25743   peepprocedure (lea_coalesce_imm);
25744   peepreplace (leaI_rReg_immI_peep());
25745 %}
25746 
25747 peephole
25748 %{
25749   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25750                 VM_Version::is_intel_cascade_lake());
25751   peepmatch (incI_rReg);
25752   peepprocedure (lea_coalesce_imm);
25753   peepreplace (leaI_rReg_immI_peep());
25754 %}
25755 
25756 peephole
25757 %{
25758   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25759                 VM_Version::is_intel_cascade_lake());
25760   peepmatch (decI_rReg);
25761   peepprocedure (lea_coalesce_imm);
25762   peepreplace (leaI_rReg_immI_peep());
25763 %}
25764 
25765 peephole
25766 %{
25767   peeppredicate(VM_Version::supports_fast_2op_lea());
25768   peepmatch (salI_rReg_immI2);
25769   peepprocedure (lea_coalesce_imm);
25770   peepreplace (leaI_rReg_immI2_peep());
25771 %}
25772 
25773 peephole
25774 %{
25775   peeppredicate(VM_Version::supports_fast_2op_lea());
25776   peepmatch (addL_rReg);
25777   peepprocedure (lea_coalesce_reg);
25778   peepreplace (leaL_rReg_rReg_peep());
25779 %}
25780 
25781 peephole
25782 %{
25783   peeppredicate(VM_Version::supports_fast_2op_lea());
25784   peepmatch (addL_rReg_imm);
25785   peepprocedure (lea_coalesce_imm);
25786   peepreplace (leaL_rReg_immL32_peep());
25787 %}
25788 
25789 peephole
25790 %{
25791   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25792                 VM_Version::is_intel_cascade_lake());
25793   peepmatch (incL_rReg);
25794   peepprocedure (lea_coalesce_imm);
25795   peepreplace (leaL_rReg_immL32_peep());
25796 %}
25797 
25798 peephole
25799 %{
25800   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25801                 VM_Version::is_intel_cascade_lake());
25802   peepmatch (decL_rReg);
25803   peepprocedure (lea_coalesce_imm);
25804   peepreplace (leaL_rReg_immL32_peep());
25805 %}
25806 
25807 peephole
25808 %{
25809   peeppredicate(VM_Version::supports_fast_2op_lea());
25810   peepmatch (salL_rReg_immI2);
25811   peepprocedure (lea_coalesce_imm);
25812   peepreplace (leaL_rReg_immI2_peep());
25813 %}
25814 
25815 peephole
25816 %{
25817   peepmatch (leaPCompressedOopOffset);
25818   peepprocedure (lea_remove_redundant);
25819 %}
25820 
25821 peephole
25822 %{
25823   peepmatch (leaP8Narrow);
25824   peepprocedure (lea_remove_redundant);
25825 %}
25826 
25827 peephole
25828 %{
25829   peepmatch (leaP32Narrow);
25830   peepprocedure (lea_remove_redundant);
25831 %}
25832 
25833 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25834 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25835 
25836 //int variant
25837 peephole
25838 %{
25839   peepmatch (testI_reg);
25840   peepprocedure (test_may_remove);
25841 %}
25842 
25843 //long variant
25844 peephole
25845 %{
25846   peepmatch (testL_reg);
25847   peepprocedure (test_may_remove);
25848 %}
25849 
25850 
25851 //----------SMARTSPILL RULES---------------------------------------------------
25852 // These must follow all instruction definitions as they use the names
25853 // defined in the instructions definitions.