1 //
    2 // Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RDX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RDI int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 
  323 source_hpp %{
  324 
  325 #include "peephole_x86_64.hpp"
  326 
  327 %}
  328 
  329 // Register masks
  330 source_hpp %{
  331 
  332 extern RegMask _ANY_REG_mask;
  333 extern RegMask _PTR_REG_mask;
  334 extern RegMask _PTR_REG_NO_RBP_mask;
  335 extern RegMask _PTR_NO_RAX_REG_mask;
  336 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  337 extern RegMask _LONG_REG_mask;
  338 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  339 extern RegMask _LONG_NO_RCX_REG_mask;
  340 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  341 extern RegMask _INT_REG_mask;
  342 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  343 extern RegMask _INT_NO_RCX_REG_mask;
  344 extern RegMask _INT_NO_RBP_R13_REG_mask;
  345 extern RegMask _FLOAT_REG_mask;
  346 
  347 extern RegMask _STACK_OR_PTR_REG_mask;
  348 extern RegMask _STACK_OR_LONG_REG_mask;
  349 extern RegMask _STACK_OR_INT_REG_mask;
  350 
  351 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  352 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  353 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  354 
  355 %}
  356 
  357 source %{
  358 #define   RELOC_IMM64    Assembler::imm_operand
  359 #define   RELOC_DISP32   Assembler::disp32_operand
  360 
  361 #define __ _masm.
  362 
  363 RegMask _ANY_REG_mask;
  364 RegMask _PTR_REG_mask;
  365 RegMask _PTR_REG_NO_RBP_mask;
  366 RegMask _PTR_NO_RAX_REG_mask;
  367 RegMask _PTR_NO_RAX_RBX_REG_mask;
  368 RegMask _LONG_REG_mask;
  369 RegMask _LONG_NO_RAX_RDX_REG_mask;
  370 RegMask _LONG_NO_RCX_REG_mask;
  371 RegMask _LONG_NO_RBP_R13_REG_mask;
  372 RegMask _INT_REG_mask;
  373 RegMask _INT_NO_RAX_RDX_REG_mask;
  374 RegMask _INT_NO_RCX_REG_mask;
  375 RegMask _INT_NO_RBP_R13_REG_mask;
  376 RegMask _FLOAT_REG_mask;
  377 RegMask _STACK_OR_PTR_REG_mask;
  378 RegMask _STACK_OR_LONG_REG_mask;
  379 RegMask _STACK_OR_INT_REG_mask;
  380 
  381 static bool need_r12_heapbase() {
  382   return UseCompressedOops;
  383 }
  384 
  385 void reg_mask_init() {
  386   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  387   // We derive a number of subsets from it.
  388   _ANY_REG_mask = _ALL_REG_mask;
  389 
  390   if (PreserveFramePointer) {
  391     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  392     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  393   }
  394   if (need_r12_heapbase()) {
  395     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  396     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  397   }
  398 
  399   _PTR_REG_mask = _ANY_REG_mask;
  400   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  401   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  402   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  403   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  404 
  405   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  406   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  407 
  408   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  409   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  410   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  411 
  412   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  413   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  414   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  415 
  416   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  417   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  418   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  419 
  420   _LONG_REG_mask = _PTR_REG_mask;
  421   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  422   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  423 
  424   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  425   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  426   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  427   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  428   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  429 
  430   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  431   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  432   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  433 
  434   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  435   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  436   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  437   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  438   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  439 
  440   _INT_REG_mask = _ALL_INT_REG_mask;
  441   if (PreserveFramePointer) {
  442     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  443   }
  444   if (need_r12_heapbase()) {
  445     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  446   }
  447 
  448   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  449   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  450 
  451   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  452   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  453   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  454 
  455   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  456   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  457 
  458   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  459   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  460   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  461 
  462   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  463   // from the float_reg_legacy/float_reg_evex register class.
  464   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  465 }
  466 
  467 static bool generate_vzeroupper(Compile* C) {
  468   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  469 }
  470 
  471 static int clear_avx_size() {
  472   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   if (_entry_point == nullptr) {
  494     // CallLeafNoFPInDirect
  495     return 3; // callq (register)
  496   }
  497   int offset = 13; // movq r10,#addr; callq (r10)
  498   if (this->ideal_Opcode() != Op_CallLeafVector) {
  499     offset += clear_avx_size();
  500   }
  501   return offset;
  502 }
  503 
  504 //
  505 // Compute padding required for nodes which need alignment
  506 //
  507 
  508 // The address of the call instruction needs to be 4-byte aligned to
  509 // ensure that it does not span a cache line so that it can be patched.
  510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  511 {
  512   current_offset += clear_avx_size(); // skip vzeroupper
  513   current_offset += 1; // skip call opcode byte
  514   return align_up(current_offset, alignment_required()) - current_offset;
  515 }
  516 
  517 // The address of the call instruction needs to be 4-byte aligned to
  518 // ensure that it does not span a cache line so that it can be patched.
  519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  520 {
  521   current_offset += clear_avx_size(); // skip vzeroupper
  522   current_offset += 11; // skip movq instruction + call opcode byte
  523   return align_up(current_offset, alignment_required()) - current_offset;
  524 }
  525 
  526 // EMIT_RM()
  527 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  528   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  529   cbuf.insts()->emit_int8(c);
  530 }
  531 
  532 // EMIT_CC()
  533 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  534   unsigned char c = (unsigned char) (f1 | f2);
  535   cbuf.insts()->emit_int8(c);
  536 }
  537 
  538 // EMIT_OPCODE()
  539 void emit_opcode(CodeBuffer &cbuf, int code) {
  540   cbuf.insts()->emit_int8((unsigned char) code);
  541 }
  542 
  543 // EMIT_OPCODE() w/ relocation information
  544 void emit_opcode(CodeBuffer &cbuf,
  545                  int code, relocInfo::relocType reloc, int offset, int format)
  546 {
  547   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  548   emit_opcode(cbuf, code);
  549 }
  550 
  551 // EMIT_D8()
  552 void emit_d8(CodeBuffer &cbuf, int d8) {
  553   cbuf.insts()->emit_int8((unsigned char) d8);
  554 }
  555 
  556 // EMIT_D16()
  557 void emit_d16(CodeBuffer &cbuf, int d16) {
  558   cbuf.insts()->emit_int16(d16);
  559 }
  560 
  561 // EMIT_D32()
  562 void emit_d32(CodeBuffer &cbuf, int d32) {
  563   cbuf.insts()->emit_int32(d32);
  564 }
  565 
  566 // EMIT_D64()
  567 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  568   cbuf.insts()->emit_int64(d64);
  569 }
  570 
  571 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  572 void emit_d32_reloc(CodeBuffer& cbuf,
  573                     int d32,
  574                     relocInfo::relocType reloc,
  575                     int format)
  576 {
  577   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  578   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  579   cbuf.insts()->emit_int32(d32);
  580 }
  581 
  582 // emit 32 bit value and construct relocation entry from RelocationHolder
  583 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
  584 #ifdef ASSERT
  585   if (rspec.reloc()->type() == relocInfo::oop_type &&
  586       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
  587     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
  588     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  589   }
  590 #endif
  591   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  592   cbuf.insts()->emit_int32(d32);
  593 }
  594 
  595 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  596   address next_ip = cbuf.insts_end() + 4;
  597   emit_d32_reloc(cbuf, (int) (addr - next_ip),
  598                  external_word_Relocation::spec(addr),
  599                  RELOC_DISP32);
  600 }
  601 
  602 
  603 // emit 64 bit value and construct relocation entry from relocInfo::relocType
  604 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  605   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  606   cbuf.insts()->emit_int64(d64);
  607 }
  608 
  609 // emit 64 bit value and construct relocation entry from RelocationHolder
  610 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
  611 #ifdef ASSERT
  612   if (rspec.reloc()->type() == relocInfo::oop_type &&
  613       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
  614     assert(Universe::heap()->is_in((address)d64), "should be real oop");
  615     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  616   }
  617 #endif
  618   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  619   cbuf.insts()->emit_int64(d64);
  620 }
  621 
  622 // Access stack slot for load or store
  623 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
  624 {
  625   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  626   if (-0x80 <= disp && disp < 0x80) {
  627     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
  628     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  629     emit_d8(cbuf, disp);     // Displacement  // R/M byte
  630   } else {
  631     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
  632     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  633     emit_d32(cbuf, disp);     // Displacement // R/M byte
  634   }
  635 }
  636 
  637    // rRegI ereg, memory mem) %{    // emit_reg_mem
  638 void encode_RegMem(CodeBuffer &cbuf,
  639                    int reg,
  640                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
  641 {
  642   assert(disp_reloc == relocInfo::none, "cannot have disp");
  643   int regenc = reg & 7;
  644   int baseenc = base & 7;
  645   int indexenc = index & 7;
  646 
  647   // There is no index & no scale, use form without SIB byte
  648   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
  649     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  650     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  651       emit_rm(cbuf, 0x0, regenc, baseenc); // *
  652     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  653       // If 8-bit displacement, mode 0x1
  654       emit_rm(cbuf, 0x1, regenc, baseenc); // *
  655       emit_d8(cbuf, disp);
  656     } else {
  657       // If 32-bit displacement
  658       if (base == -1) { // Special flag for absolute address
  659         emit_rm(cbuf, 0x0, regenc, 0x5); // *
  660         if (disp_reloc != relocInfo::none) {
  661           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  662         } else {
  663           emit_d32(cbuf, disp);
  664         }
  665       } else {
  666         // Normal base + offset
  667         emit_rm(cbuf, 0x2, regenc, baseenc); // *
  668         if (disp_reloc != relocInfo::none) {
  669           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  670         } else {
  671           emit_d32(cbuf, disp);
  672         }
  673       }
  674     }
  675   } else {
  676     // Else, encode with the SIB byte
  677     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  678     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  679       // If no displacement
  680       emit_rm(cbuf, 0x0, regenc, 0x4); // *
  681       emit_rm(cbuf, scale, indexenc, baseenc);
  682     } else {
  683       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  684         // If 8-bit displacement, mode 0x1
  685         emit_rm(cbuf, 0x1, regenc, 0x4); // *
  686         emit_rm(cbuf, scale, indexenc, baseenc);
  687         emit_d8(cbuf, disp);
  688       } else {
  689         // If 32-bit displacement
  690         if (base == 0x04 ) {
  691           emit_rm(cbuf, 0x2, regenc, 0x4);
  692           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
  693         } else {
  694           emit_rm(cbuf, 0x2, regenc, 0x4);
  695           emit_rm(cbuf, scale, indexenc, baseenc); // *
  696         }
  697         if (disp_reloc != relocInfo::none) {
  698           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  699         } else {
  700           emit_d32(cbuf, disp);
  701         }
  702       }
  703     }
  704   }
  705 }
  706 
  707 // This could be in MacroAssembler but it's fairly C2 specific
  708 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  709   Label exit;
  710   __ jccb(Assembler::noParity, exit);
  711   __ pushf();
  712   //
  713   // comiss/ucomiss instructions set ZF,PF,CF flags and
  714   // zero OF,AF,SF for NaN values.
  715   // Fixup flags by zeroing ZF,PF so that compare of NaN
  716   // values returns 'less than' result (CF is set).
  717   // Leave the rest of flags unchanged.
  718   //
  719   //    7 6 5 4 3 2 1 0
  720   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  721   //    0 0 1 0 1 0 1 1   (0x2B)
  722   //
  723   __ andq(Address(rsp, 0), 0xffffff2b);
  724   __ popf();
  725   __ bind(exit);
  726 }
  727 
  728 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  729   Label done;
  730   __ movl(dst, -1);
  731   __ jcc(Assembler::parity, done);
  732   __ jcc(Assembler::below, done);
  733   __ setb(Assembler::notEqual, dst);
  734   __ movzbl(dst, dst);
  735   __ bind(done);
  736 }
  737 
  738 // Math.min()    # Math.max()
  739 // --------------------------
  740 // ucomis[s/d]   #
  741 // ja   -> b     # a
  742 // jp   -> NaN   # NaN
  743 // jb   -> a     # b
  744 // je            #
  745 // |-jz -> a | b # a & b
  746 // |    -> a     #
  747 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  748                      XMMRegister a, XMMRegister b,
  749                      XMMRegister xmmt, Register rt,
  750                      bool min, bool single) {
  751 
  752   Label nan, zero, below, above, done;
  753 
  754   if (single)
  755     __ ucomiss(a, b);
  756   else
  757     __ ucomisd(a, b);
  758 
  759   if (dst->encoding() != (min ? b : a)->encoding())
  760     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  761   else
  762     __ jccb(Assembler::above, done);
  763 
  764   __ jccb(Assembler::parity, nan);  // PF=1
  765   __ jccb(Assembler::below, below); // CF=1
  766 
  767   // equal
  768   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  769   if (single) {
  770     __ ucomiss(a, xmmt);
  771     __ jccb(Assembler::equal, zero);
  772 
  773     __ movflt(dst, a);
  774     __ jmp(done);
  775   }
  776   else {
  777     __ ucomisd(a, xmmt);
  778     __ jccb(Assembler::equal, zero);
  779 
  780     __ movdbl(dst, a);
  781     __ jmp(done);
  782   }
  783 
  784   __ bind(zero);
  785   if (min)
  786     __ vpor(dst, a, b, Assembler::AVX_128bit);
  787   else
  788     __ vpand(dst, a, b, Assembler::AVX_128bit);
  789 
  790   __ jmp(done);
  791 
  792   __ bind(above);
  793   if (single)
  794     __ movflt(dst, min ? b : a);
  795   else
  796     __ movdbl(dst, min ? b : a);
  797 
  798   __ jmp(done);
  799 
  800   __ bind(nan);
  801   if (single) {
  802     __ movl(rt, 0x7fc00000); // Float.NaN
  803     __ movdl(dst, rt);
  804   }
  805   else {
  806     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  807     __ movdq(dst, rt);
  808   }
  809   __ jmp(done);
  810 
  811   __ bind(below);
  812   if (single)
  813     __ movflt(dst, min ? a : b);
  814   else
  815     __ movdbl(dst, min ? a : b);
  816 
  817   __ bind(done);
  818 }
  819 
  820 //=============================================================================
  821 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  822 
  823 int ConstantTable::calculate_table_base_offset() const {
  824   return 0;  // absolute addressing, no offset
  825 }
  826 
  827 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  828 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  829   ShouldNotReachHere();
  830 }
  831 
  832 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  833   // Empty encoding
  834 }
  835 
  836 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  837   return 0;
  838 }
  839 
  840 #ifndef PRODUCT
  841 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  842   st->print("# MachConstantBaseNode (empty encoding)");
  843 }
  844 #endif
  845 
  846 
  847 //=============================================================================
  848 #ifndef PRODUCT
  849 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  850   Compile* C = ra_->C;
  851 
  852   int framesize = C->output()->frame_size_in_bytes();
  853   int bangsize = C->output()->bang_size_in_bytes();
  854   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  855   // Remove wordSize for return addr which is already pushed.
  856   framesize -= wordSize;
  857 
  858   if (C->output()->need_stack_bang(bangsize)) {
  859     framesize -= wordSize;
  860     st->print("# stack bang (%d bytes)", bangsize);
  861     st->print("\n\t");
  862     st->print("pushq   rbp\t# Save rbp");
  863     if (PreserveFramePointer) {
  864         st->print("\n\t");
  865         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  866     }
  867     if (framesize) {
  868       st->print("\n\t");
  869       st->print("subq    rsp, #%d\t# Create frame",framesize);
  870     }
  871   } else {
  872     st->print("subq    rsp, #%d\t# Create frame",framesize);
  873     st->print("\n\t");
  874     framesize -= wordSize;
  875     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  876     if (PreserveFramePointer) {
  877       st->print("\n\t");
  878       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  879       if (framesize > 0) {
  880         st->print("\n\t");
  881         st->print("addq    rbp, #%d", framesize);
  882       }
  883     }
  884   }
  885 
  886   if (VerifyStackAtCalls) {
  887     st->print("\n\t");
  888     framesize -= wordSize;
  889     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  890 #ifdef ASSERT
  891     st->print("\n\t");
  892     st->print("# stack alignment check");
  893 #endif
  894   }
  895   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  896     st->print("\n\t");
  897     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  898     st->print("\n\t");
  899     st->print("je      fast_entry\t");
  900     st->print("\n\t");
  901     st->print("call    #nmethod_entry_barrier_stub\t");
  902     st->print("\n\tfast_entry:");
  903   }
  904   st->cr();
  905 }
  906 #endif
  907 
  908 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  909   Compile* C = ra_->C;
  910   C2_MacroAssembler _masm(&cbuf);
  911 
  912   __ verified_entry(C);
  913 
  914   if (ra_->C->stub_function() == nullptr) {
  915     __ entry_barrier();
  916   }
  917 
  918   if (!Compile::current()->output()->in_scratch_emit_size()) {
  919     __ bind(*_verified_entry);
  920   }
  921 
  922   C->output()->set_frame_complete(cbuf.insts_size());
  923 
  924   if (C->has_mach_constant_base_node()) {
  925     // NOTE: We set the table base offset here because users might be
  926     // emitted before MachConstantBaseNode.
  927     ConstantTable& constant_table = C->output()->constant_table();
  928     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  929   }
  930 }
  931 
  932 int MachPrologNode::reloc() const
  933 {
  934   return 0; // a large enough number
  935 }
  936 
  937 //=============================================================================
  938 #ifndef PRODUCT
  939 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  940 {
  941   Compile* C = ra_->C;
  942   if (generate_vzeroupper(C)) {
  943     st->print("vzeroupper");
  944     st->cr(); st->print("\t");
  945   }
  946 
  947   int framesize = C->output()->frame_size_in_bytes();
  948   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  949   // Remove word for return adr already pushed
  950   // and RBP
  951   framesize -= 2*wordSize;
  952 
  953   if (framesize) {
  954     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  955     st->print("\t");
  956   }
  957 
  958   st->print_cr("popq    rbp");
  959   if (do_polling() && C->is_method_compilation()) {
  960     st->print("\t");
  961     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  962                  "ja      #safepoint_stub\t"
  963                  "# Safepoint: poll for GC");
  964   }
  965 }
  966 #endif
  967 
  968 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  969 {
  970   Compile* C = ra_->C;
  971   MacroAssembler _masm(&cbuf);
  972 
  973   if (generate_vzeroupper(C)) {
  974     // Clear upper bits of YMM registers when current compiled code uses
  975     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  976     __ vzeroupper();
  977   }
  978 
  979   // Subtract two words to account for return address and rbp
  980   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  981   __ remove_frame(initial_framesize, C->needs_stack_repair());
  982 
  983   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  984     __ reserved_stack_check();
  985   }
  986 
  987   if (do_polling() && C->is_method_compilation()) {
  988     MacroAssembler _masm(&cbuf);
  989     Label dummy_label;
  990     Label* code_stub = &dummy_label;
  991     if (!C->output()->in_scratch_emit_size()) {
  992       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  993       C->output()->add_stub(stub);
  994       code_stub = &stub->entry();
  995     }
  996     __ relocate(relocInfo::poll_return_type);
  997     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  998   }
  999 }
 1000 
 1001 int MachEpilogNode::reloc() const
 1002 {
 1003   return 2; // a large enough number
 1004 }
 1005 
 1006 const Pipeline* MachEpilogNode::pipeline() const
 1007 {
 1008   return MachNode::pipeline_class();
 1009 }
 1010 
 1011 //=============================================================================
 1012 
 1013 enum RC {
 1014   rc_bad,
 1015   rc_int,
 1016   rc_kreg,
 1017   rc_float,
 1018   rc_stack
 1019 };
 1020 
 1021 static enum RC rc_class(OptoReg::Name reg)
 1022 {
 1023   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1024 
 1025   if (OptoReg::is_stack(reg)) return rc_stack;
 1026 
 1027   VMReg r = OptoReg::as_VMReg(reg);
 1028 
 1029   if (r->is_Register()) return rc_int;
 1030 
 1031   if (r->is_KRegister()) return rc_kreg;
 1032 
 1033   assert(r->is_XMMRegister(), "must be");
 1034   return rc_float;
 1035 }
 1036 
 1037 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 1038 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
 1039                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 1040 
 1041 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
 1042                      int stack_offset, int reg, uint ireg, outputStream* st);
 1043 
 1044 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 1045                                       int dst_offset, uint ireg, outputStream* st) {
 1046   if (cbuf) {
 1047     MacroAssembler _masm(cbuf);
 1048     switch (ireg) {
 1049     case Op_VecS:
 1050       __ movq(Address(rsp, -8), rax);
 1051       __ movl(rax, Address(rsp, src_offset));
 1052       __ movl(Address(rsp, dst_offset), rax);
 1053       __ movq(rax, Address(rsp, -8));
 1054       break;
 1055     case Op_VecD:
 1056       __ pushq(Address(rsp, src_offset));
 1057       __ popq (Address(rsp, dst_offset));
 1058       break;
 1059     case Op_VecX:
 1060       __ pushq(Address(rsp, src_offset));
 1061       __ popq (Address(rsp, dst_offset));
 1062       __ pushq(Address(rsp, src_offset+8));
 1063       __ popq (Address(rsp, dst_offset+8));
 1064       break;
 1065     case Op_VecY:
 1066       __ vmovdqu(Address(rsp, -32), xmm0);
 1067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 1068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 1069       __ vmovdqu(xmm0, Address(rsp, -32));
 1070       break;
 1071     case Op_VecZ:
 1072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 1073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1076       break;
 1077     default:
 1078       ShouldNotReachHere();
 1079     }
 1080 #ifndef PRODUCT
 1081   } else {
 1082     switch (ireg) {
 1083     case Op_VecS:
 1084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1085                 "movl    rax, [rsp + #%d]\n\t"
 1086                 "movl    [rsp + #%d], rax\n\t"
 1087                 "movq    rax, [rsp - #8]",
 1088                 src_offset, dst_offset);
 1089       break;
 1090     case Op_VecD:
 1091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1092                 "popq    [rsp + #%d]",
 1093                 src_offset, dst_offset);
 1094       break;
 1095      case Op_VecX:
 1096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 1097                 "popq    [rsp + #%d]\n\t"
 1098                 "pushq   [rsp + #%d]\n\t"
 1099                 "popq    [rsp + #%d]",
 1100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 1101       break;
 1102     case Op_VecY:
 1103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1106                 "vmovdqu xmm0, [rsp - #32]",
 1107                 src_offset, dst_offset);
 1108       break;
 1109     case Op_VecZ:
 1110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1113                 "vmovdqu xmm0, [rsp - #64]",
 1114                 src_offset, dst_offset);
 1115       break;
 1116     default:
 1117       ShouldNotReachHere();
 1118     }
 1119 #endif
 1120   }
 1121 }
 1122 
 1123 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1124                                        PhaseRegAlloc* ra_,
 1125                                        bool do_size,
 1126                                        outputStream* st) const {
 1127   assert(cbuf != nullptr || st  != nullptr, "sanity");
 1128   // Get registers to move
 1129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1133 
 1134   enum RC src_second_rc = rc_class(src_second);
 1135   enum RC src_first_rc = rc_class(src_first);
 1136   enum RC dst_second_rc = rc_class(dst_second);
 1137   enum RC dst_first_rc = rc_class(dst_first);
 1138 
 1139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1140          "must move at least 1 register" );
 1141 
 1142   if (src_first == dst_first && src_second == dst_second) {
 1143     // Self copy, no move
 1144     return 0;
 1145   }
 1146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1147     uint ireg = ideal_reg();
 1148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1151       // mem -> mem
 1152       int src_offset = ra_->reg2offset(src_first);
 1153       int dst_offset = ra_->reg2offset(dst_first);
 1154       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1156       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1158       int stack_offset = ra_->reg2offset(dst_first);
 1159       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1161       int stack_offset = ra_->reg2offset(src_first);
 1162       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1163     } else {
 1164       ShouldNotReachHere();
 1165     }
 1166     return 0;
 1167   }
 1168   if (src_first_rc == rc_stack) {
 1169     // mem ->
 1170     if (dst_first_rc == rc_stack) {
 1171       // mem -> mem
 1172       assert(src_second != dst_first, "overlap");
 1173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1175         // 64-bit
 1176         int src_offset = ra_->reg2offset(src_first);
 1177         int dst_offset = ra_->reg2offset(dst_first);
 1178         if (cbuf) {
 1179           MacroAssembler _masm(cbuf);
 1180           __ pushq(Address(rsp, src_offset));
 1181           __ popq (Address(rsp, dst_offset));
 1182 #ifndef PRODUCT
 1183         } else {
 1184           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1185                     "popq    [rsp + #%d]",
 1186                      src_offset, dst_offset);
 1187 #endif
 1188         }
 1189       } else {
 1190         // 32-bit
 1191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1193         // No pushl/popl, so:
 1194         int src_offset = ra_->reg2offset(src_first);
 1195         int dst_offset = ra_->reg2offset(dst_first);
 1196         if (cbuf) {
 1197           MacroAssembler _masm(cbuf);
 1198           __ movq(Address(rsp, -8), rax);
 1199           __ movl(rax, Address(rsp, src_offset));
 1200           __ movl(Address(rsp, dst_offset), rax);
 1201           __ movq(rax, Address(rsp, -8));
 1202 #ifndef PRODUCT
 1203         } else {
 1204           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1205                     "movl    rax, [rsp + #%d]\n\t"
 1206                     "movl    [rsp + #%d], rax\n\t"
 1207                     "movq    rax, [rsp - #8]",
 1208                      src_offset, dst_offset);
 1209 #endif
 1210         }
 1211       }
 1212       return 0;
 1213     } else if (dst_first_rc == rc_int) {
 1214       // mem -> gpr
 1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1217         // 64-bit
 1218         int offset = ra_->reg2offset(src_first);
 1219         if (cbuf) {
 1220           MacroAssembler _masm(cbuf);
 1221           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1222 #ifndef PRODUCT
 1223         } else {
 1224           st->print("movq    %s, [rsp + #%d]\t# spill",
 1225                      Matcher::regName[dst_first],
 1226                      offset);
 1227 #endif
 1228         }
 1229       } else {
 1230         // 32-bit
 1231         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1232         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1233         int offset = ra_->reg2offset(src_first);
 1234         if (cbuf) {
 1235           MacroAssembler _masm(cbuf);
 1236           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1237 #ifndef PRODUCT
 1238         } else {
 1239           st->print("movl    %s, [rsp + #%d]\t# spill",
 1240                      Matcher::regName[dst_first],
 1241                      offset);
 1242 #endif
 1243         }
 1244       }
 1245       return 0;
 1246     } else if (dst_first_rc == rc_float) {
 1247       // mem-> xmm
 1248       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1249           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1250         // 64-bit
 1251         int offset = ra_->reg2offset(src_first);
 1252         if (cbuf) {
 1253           MacroAssembler _masm(cbuf);
 1254           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1255 #ifndef PRODUCT
 1256         } else {
 1257           st->print("%s  %s, [rsp + #%d]\t# spill",
 1258                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1259                      Matcher::regName[dst_first],
 1260                      offset);
 1261 #endif
 1262         }
 1263       } else {
 1264         // 32-bit
 1265         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1266         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1267         int offset = ra_->reg2offset(src_first);
 1268         if (cbuf) {
 1269           MacroAssembler _masm(cbuf);
 1270           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1271 #ifndef PRODUCT
 1272         } else {
 1273           st->print("movss   %s, [rsp + #%d]\t# spill",
 1274                      Matcher::regName[dst_first],
 1275                      offset);
 1276 #endif
 1277         }
 1278       }
 1279       return 0;
 1280     } else if (dst_first_rc == rc_kreg) {
 1281       // mem -> kreg
 1282       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1283           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1284         // 64-bit
 1285         int offset = ra_->reg2offset(src_first);
 1286         if (cbuf) {
 1287           MacroAssembler _masm(cbuf);
 1288           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1289 #ifndef PRODUCT
 1290         } else {
 1291           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1292                      Matcher::regName[dst_first],
 1293                      offset);
 1294 #endif
 1295         }
 1296       }
 1297       return 0;
 1298     }
 1299   } else if (src_first_rc == rc_int) {
 1300     // gpr ->
 1301     if (dst_first_rc == rc_stack) {
 1302       // gpr -> mem
 1303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1305         // 64-bit
 1306         int offset = ra_->reg2offset(dst_first);
 1307         if (cbuf) {
 1308           MacroAssembler _masm(cbuf);
 1309           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1310 #ifndef PRODUCT
 1311         } else {
 1312           st->print("movq    [rsp + #%d], %s\t# spill",
 1313                      offset,
 1314                      Matcher::regName[src_first]);
 1315 #endif
 1316         }
 1317       } else {
 1318         // 32-bit
 1319         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1320         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1321         int offset = ra_->reg2offset(dst_first);
 1322         if (cbuf) {
 1323           MacroAssembler _masm(cbuf);
 1324           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1325 #ifndef PRODUCT
 1326         } else {
 1327           st->print("movl    [rsp + #%d], %s\t# spill",
 1328                      offset,
 1329                      Matcher::regName[src_first]);
 1330 #endif
 1331         }
 1332       }
 1333       return 0;
 1334     } else if (dst_first_rc == rc_int) {
 1335       // gpr -> gpr
 1336       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1337           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1338         // 64-bit
 1339         if (cbuf) {
 1340           MacroAssembler _masm(cbuf);
 1341           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1342                   as_Register(Matcher::_regEncode[src_first]));
 1343 #ifndef PRODUCT
 1344         } else {
 1345           st->print("movq    %s, %s\t# spill",
 1346                      Matcher::regName[dst_first],
 1347                      Matcher::regName[src_first]);
 1348 #endif
 1349         }
 1350         return 0;
 1351       } else {
 1352         // 32-bit
 1353         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1354         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1355         if (cbuf) {
 1356           MacroAssembler _masm(cbuf);
 1357           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1358                   as_Register(Matcher::_regEncode[src_first]));
 1359 #ifndef PRODUCT
 1360         } else {
 1361           st->print("movl    %s, %s\t# spill",
 1362                      Matcher::regName[dst_first],
 1363                      Matcher::regName[src_first]);
 1364 #endif
 1365         }
 1366         return 0;
 1367       }
 1368     } else if (dst_first_rc == rc_float) {
 1369       // gpr -> xmm
 1370       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1371           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1372         // 64-bit
 1373         if (cbuf) {
 1374           MacroAssembler _masm(cbuf);
 1375           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1376 #ifndef PRODUCT
 1377         } else {
 1378           st->print("movdq   %s, %s\t# spill",
 1379                      Matcher::regName[dst_first],
 1380                      Matcher::regName[src_first]);
 1381 #endif
 1382         }
 1383       } else {
 1384         // 32-bit
 1385         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1386         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1387         if (cbuf) {
 1388           MacroAssembler _masm(cbuf);
 1389           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1390 #ifndef PRODUCT
 1391         } else {
 1392           st->print("movdl   %s, %s\t# spill",
 1393                      Matcher::regName[dst_first],
 1394                      Matcher::regName[src_first]);
 1395 #endif
 1396         }
 1397       }
 1398       return 0;
 1399     } else if (dst_first_rc == rc_kreg) {
 1400       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1401           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1402         // 64-bit
 1403         if (cbuf) {
 1404           MacroAssembler _masm(cbuf);
 1405           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1406   #ifndef PRODUCT
 1407         } else {
 1408            st->print("kmovq   %s, %s\t# spill",
 1409                        Matcher::regName[dst_first],
 1410                        Matcher::regName[src_first]);
 1411   #endif
 1412         }
 1413       }
 1414       Unimplemented();
 1415       return 0;
 1416     }
 1417   } else if (src_first_rc == rc_float) {
 1418     // xmm ->
 1419     if (dst_first_rc == rc_stack) {
 1420       // xmm -> mem
 1421       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1422           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1423         // 64-bit
 1424         int offset = ra_->reg2offset(dst_first);
 1425         if (cbuf) {
 1426           MacroAssembler _masm(cbuf);
 1427           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1428 #ifndef PRODUCT
 1429         } else {
 1430           st->print("movsd   [rsp + #%d], %s\t# spill",
 1431                      offset,
 1432                      Matcher::regName[src_first]);
 1433 #endif
 1434         }
 1435       } else {
 1436         // 32-bit
 1437         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1438         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1439         int offset = ra_->reg2offset(dst_first);
 1440         if (cbuf) {
 1441           MacroAssembler _masm(cbuf);
 1442           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1443 #ifndef PRODUCT
 1444         } else {
 1445           st->print("movss   [rsp + #%d], %s\t# spill",
 1446                      offset,
 1447                      Matcher::regName[src_first]);
 1448 #endif
 1449         }
 1450       }
 1451       return 0;
 1452     } else if (dst_first_rc == rc_int) {
 1453       // xmm -> gpr
 1454       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1455           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1456         // 64-bit
 1457         if (cbuf) {
 1458           MacroAssembler _masm(cbuf);
 1459           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1460 #ifndef PRODUCT
 1461         } else {
 1462           st->print("movdq   %s, %s\t# spill",
 1463                      Matcher::regName[dst_first],
 1464                      Matcher::regName[src_first]);
 1465 #endif
 1466         }
 1467       } else {
 1468         // 32-bit
 1469         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1470         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1471         if (cbuf) {
 1472           MacroAssembler _masm(cbuf);
 1473           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1474 #ifndef PRODUCT
 1475         } else {
 1476           st->print("movdl   %s, %s\t# spill",
 1477                      Matcher::regName[dst_first],
 1478                      Matcher::regName[src_first]);
 1479 #endif
 1480         }
 1481       }
 1482       return 0;
 1483     } else if (dst_first_rc == rc_float) {
 1484       // xmm -> xmm
 1485       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1486           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1487         // 64-bit
 1488         if (cbuf) {
 1489           MacroAssembler _masm(cbuf);
 1490           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1491 #ifndef PRODUCT
 1492         } else {
 1493           st->print("%s  %s, %s\t# spill",
 1494                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1495                      Matcher::regName[dst_first],
 1496                      Matcher::regName[src_first]);
 1497 #endif
 1498         }
 1499       } else {
 1500         // 32-bit
 1501         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1502         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1503         if (cbuf) {
 1504           MacroAssembler _masm(cbuf);
 1505           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1506 #ifndef PRODUCT
 1507         } else {
 1508           st->print("%s  %s, %s\t# spill",
 1509                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1510                      Matcher::regName[dst_first],
 1511                      Matcher::regName[src_first]);
 1512 #endif
 1513         }
 1514       }
 1515       return 0;
 1516     } else if (dst_first_rc == rc_kreg) {
 1517       assert(false, "Illegal spilling");
 1518       return 0;
 1519     }
 1520   } else if (src_first_rc == rc_kreg) {
 1521     if (dst_first_rc == rc_stack) {
 1522       // mem -> kreg
 1523       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1524           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1525         // 64-bit
 1526         int offset = ra_->reg2offset(dst_first);
 1527         if (cbuf) {
 1528           MacroAssembler _masm(cbuf);
 1529           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1530 #ifndef PRODUCT
 1531         } else {
 1532           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1533                      offset,
 1534                      Matcher::regName[src_first]);
 1535 #endif
 1536         }
 1537       }
 1538       return 0;
 1539     } else if (dst_first_rc == rc_int) {
 1540       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1541           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1542         // 64-bit
 1543         if (cbuf) {
 1544           MacroAssembler _masm(cbuf);
 1545           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1546 #ifndef PRODUCT
 1547         } else {
 1548          st->print("kmovq   %s, %s\t# spill",
 1549                      Matcher::regName[dst_first],
 1550                      Matcher::regName[src_first]);
 1551 #endif
 1552         }
 1553       }
 1554       Unimplemented();
 1555       return 0;
 1556     } else if (dst_first_rc == rc_kreg) {
 1557       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1558           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1559         // 64-bit
 1560         if (cbuf) {
 1561           MacroAssembler _masm(cbuf);
 1562           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1563 #ifndef PRODUCT
 1564         } else {
 1565          st->print("kmovq   %s, %s\t# spill",
 1566                      Matcher::regName[dst_first],
 1567                      Matcher::regName[src_first]);
 1568 #endif
 1569         }
 1570       }
 1571       return 0;
 1572     } else if (dst_first_rc == rc_float) {
 1573       assert(false, "Illegal spill");
 1574       return 0;
 1575     }
 1576   }
 1577 
 1578   assert(0," foo ");
 1579   Unimplemented();
 1580   return 0;
 1581 }
 1582 
 1583 #ifndef PRODUCT
 1584 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1585   implementation(nullptr, ra_, false, st);
 1586 }
 1587 #endif
 1588 
 1589 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1590   implementation(&cbuf, ra_, false, nullptr);
 1591 }
 1592 
 1593 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1594   return MachNode::size(ra_);
 1595 }
 1596 
 1597 //=============================================================================
 1598 #ifndef PRODUCT
 1599 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1600 {
 1601   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1602   int reg = ra_->get_reg_first(this);
 1603   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1604             Matcher::regName[reg], offset);
 1605 }
 1606 #endif
 1607 
 1608 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1609 {
 1610   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1611   int reg = ra_->get_encode(this);
 1612   if (offset >= 0x80) {
 1613     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1614     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1615     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1616     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1617     emit_d32(cbuf, offset);
 1618   } else {
 1619     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1620     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1621     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1622     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1623     emit_d8(cbuf, offset);
 1624   }
 1625 }
 1626 
 1627 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1628 {
 1629   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1630   return (offset < 0x80) ? 5 : 8; // REX
 1631 }
 1632 
 1633 //=============================================================================
 1634 #ifndef PRODUCT
 1635 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1636 {
 1637   st->print_cr("MachVEPNode");
 1638 }
 1639 #endif
 1640 
 1641 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1642 {
 1643   C2_MacroAssembler _masm(&cbuf);
 1644   uint insts_size = cbuf.insts_size();
 1645   if (!_verified) {
 1646     if (UseCompressedClassPointers) {
 1647       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1648       __ cmpptr(rax, rscratch1);
 1649     } else {
 1650       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1651     }
 1652     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1653   } else {
 1654     // TODO 8284443 Avoid creation of temporary frame
 1655     if (ra_->C->stub_function() == nullptr) {
 1656       __ verified_entry(ra_->C, 0);
 1657       __ entry_barrier();
 1658       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1659       __ remove_frame(initial_framesize, false);
 1660     }
 1661     // Unpack inline type args passed as oop and then jump to
 1662     // the verified entry point (skipping the unverified entry).
 1663     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1664     // Emit code for verified entry and save increment for stack repair on return
 1665     __ verified_entry(ra_->C, sp_inc);
 1666     if (Compile::current()->output()->in_scratch_emit_size()) {
 1667       Label dummy_verified_entry;
 1668       __ jmp(dummy_verified_entry);
 1669     } else {
 1670       __ jmp(*_verified_entry);
 1671     }
 1672   }
 1673   /* WARNING these NOPs are critical so that verified entry point is properly
 1674      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1675   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1676   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1677   if (nops_cnt > 0) {
 1678     __ nop(nops_cnt);
 1679   }
 1680 }
 1681 
 1682 //=============================================================================
 1683 #ifndef PRODUCT
 1684 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1685 {
 1686   if (UseCompressedClassPointers) {
 1687     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1688     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1689     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1690   } else {
 1691     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1692                  "# Inline cache check");
 1693   }
 1694   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1695   st->print_cr("\tnop\t# nops to align entry point");
 1696 }
 1697 #endif
 1698 
 1699 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1700 {
 1701   MacroAssembler masm(&cbuf);
 1702   uint insts_size = cbuf.insts_size();
 1703   if (UseCompressedClassPointers) {
 1704     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1705     masm.cmpptr(rax, rscratch1);
 1706   } else {
 1707     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1708   }
 1709 
 1710   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1711 
 1712   /* WARNING these NOPs are critical so that verified entry point is properly
 1713      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1714   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1715   if (OptoBreakpoint) {
 1716     // Leave space for int3
 1717     nops_cnt -= 1;
 1718   }
 1719   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1720   if (nops_cnt > 0)
 1721     masm.nop(nops_cnt);
 1722 }
 1723 
 1724 //=============================================================================
 1725 
 1726 bool Matcher::supports_vector_calling_convention(void) {
 1727   if (EnableVectorSupport && UseVectorStubs) {
 1728     return true;
 1729   }
 1730   return false;
 1731 }
 1732 
 1733 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1734   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1735   int lo = XMM0_num;
 1736   int hi = XMM0b_num;
 1737   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1738   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1739   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1740   return OptoRegPair(hi, lo);
 1741 }
 1742 
 1743 // Is this branch offset short enough that a short branch can be used?
 1744 //
 1745 // NOTE: If the platform does not provide any short branch variants, then
 1746 //       this method should return false for offset 0.
 1747 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1748   // The passed offset is relative to address of the branch.
 1749   // On 86 a branch displacement is calculated relative to address
 1750   // of a next instruction.
 1751   offset -= br_size;
 1752 
 1753   // the short version of jmpConUCF2 contains multiple branches,
 1754   // making the reach slightly less
 1755   if (rule == jmpConUCF2_rule)
 1756     return (-126 <= offset && offset <= 125);
 1757   return (-128 <= offset && offset <= 127);
 1758 }
 1759 
 1760 // Return whether or not this register is ever used as an argument.
 1761 // This function is used on startup to build the trampoline stubs in
 1762 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1763 // call in the trampoline, and arguments in those registers not be
 1764 // available to the callee.
 1765 bool Matcher::can_be_java_arg(int reg)
 1766 {
 1767   return
 1768     reg ==  RDI_num || reg == RDI_H_num ||
 1769     reg ==  RSI_num || reg == RSI_H_num ||
 1770     reg ==  RDX_num || reg == RDX_H_num ||
 1771     reg ==  RCX_num || reg == RCX_H_num ||
 1772     reg ==   R8_num || reg ==  R8_H_num ||
 1773     reg ==   R9_num || reg ==  R9_H_num ||
 1774     reg ==  R12_num || reg == R12_H_num ||
 1775     reg == XMM0_num || reg == XMM0b_num ||
 1776     reg == XMM1_num || reg == XMM1b_num ||
 1777     reg == XMM2_num || reg == XMM2b_num ||
 1778     reg == XMM3_num || reg == XMM3b_num ||
 1779     reg == XMM4_num || reg == XMM4b_num ||
 1780     reg == XMM5_num || reg == XMM5b_num ||
 1781     reg == XMM6_num || reg == XMM6b_num ||
 1782     reg == XMM7_num || reg == XMM7b_num;
 1783 }
 1784 
 1785 bool Matcher::is_spillable_arg(int reg)
 1786 {
 1787   return can_be_java_arg(reg);
 1788 }
 1789 
 1790 uint Matcher::int_pressure_limit()
 1791 {
 1792   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1793 }
 1794 
 1795 uint Matcher::float_pressure_limit()
 1796 {
 1797   // After experiment around with different values, the following default threshold
 1798   // works best for LCM's register pressure scheduling on x64.
 1799   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1800   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1801   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1802 }
 1803 
 1804 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1805   // In 64 bit mode a code which use multiply when
 1806   // devisor is constant is faster than hardware
 1807   // DIV instruction (it uses MulHiL).
 1808   return false;
 1809 }
 1810 
 1811 // Register for DIVI projection of divmodI
 1812 RegMask Matcher::divI_proj_mask() {
 1813   return INT_RAX_REG_mask();
 1814 }
 1815 
 1816 // Register for MODI projection of divmodI
 1817 RegMask Matcher::modI_proj_mask() {
 1818   return INT_RDX_REG_mask();
 1819 }
 1820 
 1821 // Register for DIVL projection of divmodL
 1822 RegMask Matcher::divL_proj_mask() {
 1823   return LONG_RAX_REG_mask();
 1824 }
 1825 
 1826 // Register for MODL projection of divmodL
 1827 RegMask Matcher::modL_proj_mask() {
 1828   return LONG_RDX_REG_mask();
 1829 }
 1830 
 1831 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1832 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1833     return NO_REG_mask();
 1834 }
 1835 
 1836 %}
 1837 
 1838 //----------ENCODING BLOCK-----------------------------------------------------
 1839 // This block specifies the encoding classes used by the compiler to
 1840 // output byte streams.  Encoding classes are parameterized macros
 1841 // used by Machine Instruction Nodes in order to generate the bit
 1842 // encoding of the instruction.  Operands specify their base encoding
 1843 // interface with the interface keyword.  There are currently
 1844 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1845 // COND_INTER.  REG_INTER causes an operand to generate a function
 1846 // which returns its register number when queried.  CONST_INTER causes
 1847 // an operand to generate a function which returns the value of the
 1848 // constant when queried.  MEMORY_INTER causes an operand to generate
 1849 // four functions which return the Base Register, the Index Register,
 1850 // the Scale Value, and the Offset Value of the operand when queried.
 1851 // COND_INTER causes an operand to generate six functions which return
 1852 // the encoding code (ie - encoding bits for the instruction)
 1853 // associated with each basic boolean condition for a conditional
 1854 // instruction.
 1855 //
 1856 // Instructions specify two basic values for encoding.  Again, a
 1857 // function is available to check if the constant displacement is an
 1858 // oop. They use the ins_encode keyword to specify their encoding
 1859 // classes (which must be a sequence of enc_class names, and their
 1860 // parameters, specified in the encoding block), and they use the
 1861 // opcode keyword to specify, in order, their primary, secondary, and
 1862 // tertiary opcode.  Only the opcode sections which a particular
 1863 // instruction needs for encoding need to be specified.
 1864 encode %{
 1865   // Build emit functions for each basic byte or larger field in the
 1866   // intel encoding scheme (opcode, rm, sib, immediate), and call them
 1867   // from C++ code in the enc_class source block.  Emit functions will
 1868   // live in the main source block for now.  In future, we can
 1869   // generalize this by adding a syntax that specifies the sizes of
 1870   // fields in an order, so that the adlc can build the emit functions
 1871   // automagically
 1872 
 1873   // Emit primary opcode
 1874   enc_class OpcP
 1875   %{
 1876     emit_opcode(cbuf, $primary);
 1877   %}
 1878 
 1879   // Emit secondary opcode
 1880   enc_class OpcS
 1881   %{
 1882     emit_opcode(cbuf, $secondary);
 1883   %}
 1884 
 1885   // Emit tertiary opcode
 1886   enc_class OpcT
 1887   %{
 1888     emit_opcode(cbuf, $tertiary);
 1889   %}
 1890 
 1891   // Emit opcode directly
 1892   enc_class Opcode(immI d8)
 1893   %{
 1894     emit_opcode(cbuf, $d8$$constant);
 1895   %}
 1896 
 1897   // Emit size prefix
 1898   enc_class SizePrefix
 1899   %{
 1900     emit_opcode(cbuf, 0x66);
 1901   %}
 1902 
 1903   enc_class reg(rRegI reg)
 1904   %{
 1905     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
 1906   %}
 1907 
 1908   enc_class reg_reg(rRegI dst, rRegI src)
 1909   %{
 1910     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1911   %}
 1912 
 1913   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
 1914   %{
 1915     emit_opcode(cbuf, $opcode$$constant);
 1916     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1917   %}
 1918 
 1919   enc_class cdql_enc(no_rax_rdx_RegI div)
 1920   %{
 1921     // Full implementation of Java idiv and irem; checks for
 1922     // special case as described in JVM spec., p.243 & p.271.
 1923     //
 1924     //         normal case                           special case
 1925     //
 1926     // input : rax: dividend                         min_int
 1927     //         reg: divisor                          -1
 1928     //
 1929     // output: rax: quotient  (= rax idiv reg)       min_int
 1930     //         rdx: remainder (= rax irem reg)       0
 1931     //
 1932     //  Code sequnce:
 1933     //
 1934     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1935     //    5:   75 07/08                jne    e <normal>
 1936     //    7:   33 d2                   xor    %edx,%edx
 1937     //  [div >= 8 -> offset + 1]
 1938     //  [REX_B]
 1939     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1940     //    c:   74 03/04                je     11 <done>
 1941     // 000000000000000e <normal>:
 1942     //    e:   99                      cltd
 1943     //  [div >= 8 -> offset + 1]
 1944     //  [REX_B]
 1945     //    f:   f7 f9                   idiv   $div
 1946     // 0000000000000011 <done>:
 1947     MacroAssembler _masm(&cbuf);
 1948     Label normal;
 1949     Label done;
 1950 
 1951     // cmp    $0x80000000,%eax
 1952     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1953 
 1954     // jne    e <normal>
 1955     __ jccb(Assembler::notEqual, normal);
 1956 
 1957     // xor    %edx,%edx
 1958     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1959 
 1960     // cmp    $0xffffffffffffffff,%ecx
 1961     __ cmpl($div$$Register, -1);
 1962 
 1963     // je     11 <done>
 1964     __ jccb(Assembler::equal, done);
 1965 
 1966     // <normal>
 1967     // cltd
 1968     __ bind(normal);
 1969     __ cdql();
 1970 
 1971     // idivl
 1972     // <done>
 1973     __ idivl($div$$Register);
 1974     __ bind(done);
 1975   %}
 1976 
 1977   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1978   %{
 1979     // Full implementation of Java ldiv and lrem; checks for
 1980     // special case as described in JVM spec., p.243 & p.271.
 1981     //
 1982     //         normal case                           special case
 1983     //
 1984     // input : rax: dividend                         min_long
 1985     //         reg: divisor                          -1
 1986     //
 1987     // output: rax: quotient  (= rax idiv reg)       min_long
 1988     //         rdx: remainder (= rax irem reg)       0
 1989     //
 1990     //  Code sequnce:
 1991     //
 1992     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1993     //    7:   00 00 80
 1994     //    a:   48 39 d0                cmp    %rdx,%rax
 1995     //    d:   75 08                   jne    17 <normal>
 1996     //    f:   33 d2                   xor    %edx,%edx
 1997     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 1998     //   15:   74 05                   je     1c <done>
 1999     // 0000000000000017 <normal>:
 2000     //   17:   48 99                   cqto
 2001     //   19:   48 f7 f9                idiv   $div
 2002     // 000000000000001c <done>:
 2003     MacroAssembler _masm(&cbuf);
 2004     Label normal;
 2005     Label done;
 2006 
 2007     // mov    $0x8000000000000000,%rdx
 2008     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 2009 
 2010     // cmp    %rdx,%rax
 2011     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 2012 
 2013     // jne    17 <normal>
 2014     __ jccb(Assembler::notEqual, normal);
 2015 
 2016     // xor    %edx,%edx
 2017     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 2018 
 2019     // cmp    $0xffffffffffffffff,$div
 2020     __ cmpq($div$$Register, -1);
 2021 
 2022     // je     1e <done>
 2023     __ jccb(Assembler::equal, done);
 2024 
 2025     // <normal>
 2026     // cqto
 2027     __ bind(normal);
 2028     __ cdqq();
 2029 
 2030     // idivq (note: must be emitted by the user of this rule)
 2031     // <done>
 2032     __ idivq($div$$Register);
 2033     __ bind(done);
 2034   %}
 2035 
 2036   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 2037   enc_class OpcSE(immI imm)
 2038   %{
 2039     // Emit primary opcode and set sign-extend bit
 2040     // Check for 8-bit immediate, and set sign extend bit in opcode
 2041     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2042       emit_opcode(cbuf, $primary | 0x02);
 2043     } else {
 2044       // 32-bit immediate
 2045       emit_opcode(cbuf, $primary);
 2046     }
 2047   %}
 2048 
 2049   enc_class OpcSErm(rRegI dst, immI imm)
 2050   %{
 2051     // OpcSEr/m
 2052     int dstenc = $dst$$reg;
 2053     if (dstenc >= 8) {
 2054       emit_opcode(cbuf, Assembler::REX_B);
 2055       dstenc -= 8;
 2056     }
 2057     // Emit primary opcode and set sign-extend bit
 2058     // Check for 8-bit immediate, and set sign extend bit in opcode
 2059     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2060       emit_opcode(cbuf, $primary | 0x02);
 2061     } else {
 2062       // 32-bit immediate
 2063       emit_opcode(cbuf, $primary);
 2064     }
 2065     // Emit r/m byte with secondary opcode, after primary opcode.
 2066     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2067   %}
 2068 
 2069   enc_class OpcSErm_wide(rRegL dst, immI imm)
 2070   %{
 2071     // OpcSEr/m
 2072     int dstenc = $dst$$reg;
 2073     if (dstenc < 8) {
 2074       emit_opcode(cbuf, Assembler::REX_W);
 2075     } else {
 2076       emit_opcode(cbuf, Assembler::REX_WB);
 2077       dstenc -= 8;
 2078     }
 2079     // Emit primary opcode and set sign-extend bit
 2080     // Check for 8-bit immediate, and set sign extend bit in opcode
 2081     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2082       emit_opcode(cbuf, $primary | 0x02);
 2083     } else {
 2084       // 32-bit immediate
 2085       emit_opcode(cbuf, $primary);
 2086     }
 2087     // Emit r/m byte with secondary opcode, after primary opcode.
 2088     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2089   %}
 2090 
 2091   enc_class Con8or32(immI imm)
 2092   %{
 2093     // Check for 8-bit immediate, and set sign extend bit in opcode
 2094     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2095       $$$emit8$imm$$constant;
 2096     } else {
 2097       // 32-bit immediate
 2098       $$$emit32$imm$$constant;
 2099     }
 2100   %}
 2101 
 2102   enc_class opc2_reg(rRegI dst)
 2103   %{
 2104     // BSWAP
 2105     emit_cc(cbuf, $secondary, $dst$$reg);
 2106   %}
 2107 
 2108   enc_class opc3_reg(rRegI dst)
 2109   %{
 2110     // BSWAP
 2111     emit_cc(cbuf, $tertiary, $dst$$reg);
 2112   %}
 2113 
 2114   enc_class reg_opc(rRegI div)
 2115   %{
 2116     // INC, DEC, IDIV, IMOD, JMP indirect, ...
 2117     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
 2118   %}
 2119 
 2120   enc_class enc_cmov(cmpOp cop)
 2121   %{
 2122     // CMOV
 2123     $$$emit8$primary;
 2124     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2125   %}
 2126 
 2127   enc_class enc_PartialSubtypeCheck()
 2128   %{
 2129     Register Rrdi = as_Register(RDI_enc); // result register
 2130     Register Rrax = as_Register(RAX_enc); // super class
 2131     Register Rrcx = as_Register(RCX_enc); // killed
 2132     Register Rrsi = as_Register(RSI_enc); // sub class
 2133     Label miss;
 2134     const bool set_cond_codes = true;
 2135 
 2136     MacroAssembler _masm(&cbuf);
 2137     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2138                                      nullptr, &miss,
 2139                                      /*set_cond_codes:*/ true);
 2140     if ($primary) {
 2141       __ xorptr(Rrdi, Rrdi);
 2142     }
 2143     __ bind(miss);
 2144   %}
 2145 
 2146   enc_class clear_avx %{
 2147     debug_only(int off0 = cbuf.insts_size());
 2148     if (generate_vzeroupper(Compile::current())) {
 2149       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2150       // Clear upper bits of YMM registers when current compiled code uses
 2151       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2152       MacroAssembler _masm(&cbuf);
 2153       __ vzeroupper();
 2154     }
 2155     debug_only(int off1 = cbuf.insts_size());
 2156     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2157   %}
 2158 
 2159   enc_class Java_To_Runtime(method meth) %{
 2160     // No relocation needed
 2161     MacroAssembler _masm(&cbuf);
 2162     __ mov64(r10, (int64_t) $meth$$method);
 2163     __ call(r10);
 2164     __ post_call_nop();
 2165   %}
 2166 
 2167   enc_class Java_Static_Call(method meth)
 2168   %{
 2169     // JAVA STATIC CALL
 2170     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 2171     // determine who we intended to call.
 2172     MacroAssembler _masm(&cbuf);
 2173     cbuf.set_insts_mark();
 2174 
 2175     if (!_method) {
 2176       $$$emit8$primary;
 2177       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2178                      runtime_call_Relocation::spec(),
 2179                      RELOC_DISP32);
 2180     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2181       // The NOP here is purely to ensure that eliding a call to
 2182       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2183       __ addr_nop_5();
 2184       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2185     } else {
 2186       $$$emit8$primary;
 2187       int method_index = resolved_method_index(cbuf);
 2188       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2189                                                   : static_call_Relocation::spec(method_index);
 2190       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2191                      rspec, RELOC_DISP32);
 2192       address mark = cbuf.insts_mark();
 2193       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2194         // Calls of the same statically bound method can share
 2195         // a stub to the interpreter.
 2196         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2197       } else {
 2198         // Emit stubs for static call.
 2199         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2200         if (stub == nullptr) {
 2201           ciEnv::current()->record_failure("CodeCache is full");
 2202           return;
 2203         }
 2204       }
 2205     }
 2206     _masm.clear_inst_mark();
 2207     __ post_call_nop();
 2208   %}
 2209 
 2210   enc_class Java_Dynamic_Call(method meth) %{
 2211     MacroAssembler _masm(&cbuf);
 2212     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2213     __ post_call_nop();
 2214   %}
 2215 
 2216   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2217   %{
 2218     // SAL, SAR, SHR
 2219     int dstenc = $dst$$reg;
 2220     if (dstenc >= 8) {
 2221       emit_opcode(cbuf, Assembler::REX_B);
 2222       dstenc -= 8;
 2223     }
 2224     $$$emit8$primary;
 2225     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2226     $$$emit8$shift$$constant;
 2227   %}
 2228 
 2229   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
 2230   %{
 2231     // SAL, SAR, SHR
 2232     int dstenc = $dst$$reg;
 2233     if (dstenc < 8) {
 2234       emit_opcode(cbuf, Assembler::REX_W);
 2235     } else {
 2236       emit_opcode(cbuf, Assembler::REX_WB);
 2237       dstenc -= 8;
 2238     }
 2239     $$$emit8$primary;
 2240     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2241     $$$emit8$shift$$constant;
 2242   %}
 2243 
 2244   enc_class load_immI(rRegI dst, immI src)
 2245   %{
 2246     int dstenc = $dst$$reg;
 2247     if (dstenc >= 8) {
 2248       emit_opcode(cbuf, Assembler::REX_B);
 2249       dstenc -= 8;
 2250     }
 2251     emit_opcode(cbuf, 0xB8 | dstenc);
 2252     $$$emit32$src$$constant;
 2253   %}
 2254 
 2255   enc_class load_immL(rRegL dst, immL src)
 2256   %{
 2257     int dstenc = $dst$$reg;
 2258     if (dstenc < 8) {
 2259       emit_opcode(cbuf, Assembler::REX_W);
 2260     } else {
 2261       emit_opcode(cbuf, Assembler::REX_WB);
 2262       dstenc -= 8;
 2263     }
 2264     emit_opcode(cbuf, 0xB8 | dstenc);
 2265     emit_d64(cbuf, $src$$constant);
 2266   %}
 2267 
 2268   enc_class load_immUL32(rRegL dst, immUL32 src)
 2269   %{
 2270     // same as load_immI, but this time we care about zeroes in the high word
 2271     int dstenc = $dst$$reg;
 2272     if (dstenc >= 8) {
 2273       emit_opcode(cbuf, Assembler::REX_B);
 2274       dstenc -= 8;
 2275     }
 2276     emit_opcode(cbuf, 0xB8 | dstenc);
 2277     $$$emit32$src$$constant;
 2278   %}
 2279 
 2280   enc_class load_immL32(rRegL dst, immL32 src)
 2281   %{
 2282     int dstenc = $dst$$reg;
 2283     if (dstenc < 8) {
 2284       emit_opcode(cbuf, Assembler::REX_W);
 2285     } else {
 2286       emit_opcode(cbuf, Assembler::REX_WB);
 2287       dstenc -= 8;
 2288     }
 2289     emit_opcode(cbuf, 0xC7);
 2290     emit_rm(cbuf, 0x03, 0x00, dstenc);
 2291     $$$emit32$src$$constant;
 2292   %}
 2293 
 2294   enc_class load_immP31(rRegP dst, immP32 src)
 2295   %{
 2296     // same as load_immI, but this time we care about zeroes in the high word
 2297     int dstenc = $dst$$reg;
 2298     if (dstenc >= 8) {
 2299       emit_opcode(cbuf, Assembler::REX_B);
 2300       dstenc -= 8;
 2301     }
 2302     emit_opcode(cbuf, 0xB8 | dstenc);
 2303     $$$emit32$src$$constant;
 2304   %}
 2305 
 2306   enc_class load_immP(rRegP dst, immP src)
 2307   %{
 2308     int dstenc = $dst$$reg;
 2309     if (dstenc < 8) {
 2310       emit_opcode(cbuf, Assembler::REX_W);
 2311     } else {
 2312       emit_opcode(cbuf, Assembler::REX_WB);
 2313       dstenc -= 8;
 2314     }
 2315     emit_opcode(cbuf, 0xB8 | dstenc);
 2316     // This next line should be generated from ADLC
 2317     if ($src->constant_reloc() != relocInfo::none) {
 2318       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
 2319     } else {
 2320       emit_d64(cbuf, $src$$constant);
 2321     }
 2322   %}
 2323 
 2324   enc_class Con32(immI src)
 2325   %{
 2326     // Output immediate
 2327     $$$emit32$src$$constant;
 2328   %}
 2329 
 2330   enc_class Con32F_as_bits(immF src)
 2331   %{
 2332     // Output Float immediate bits
 2333     jfloat jf = $src$$constant;
 2334     jint jf_as_bits = jint_cast(jf);
 2335     emit_d32(cbuf, jf_as_bits);
 2336   %}
 2337 
 2338   enc_class Con16(immI src)
 2339   %{
 2340     // Output immediate
 2341     $$$emit16$src$$constant;
 2342   %}
 2343 
 2344   // How is this different from Con32??? XXX
 2345   enc_class Con_d32(immI src)
 2346   %{
 2347     emit_d32(cbuf,$src$$constant);
 2348   %}
 2349 
 2350   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
 2351     // Output immediate memory reference
 2352     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2353     emit_d32(cbuf, 0x00);
 2354   %}
 2355 
 2356   enc_class lock_prefix()
 2357   %{
 2358     emit_opcode(cbuf, 0xF0); // lock
 2359   %}
 2360 
 2361   enc_class REX_mem(memory mem)
 2362   %{
 2363     if ($mem$$base >= 8) {
 2364       if ($mem$$index < 8) {
 2365         emit_opcode(cbuf, Assembler::REX_B);
 2366       } else {
 2367         emit_opcode(cbuf, Assembler::REX_XB);
 2368       }
 2369     } else {
 2370       if ($mem$$index >= 8) {
 2371         emit_opcode(cbuf, Assembler::REX_X);
 2372       }
 2373     }
 2374   %}
 2375 
 2376   enc_class REX_mem_wide(memory mem)
 2377   %{
 2378     if ($mem$$base >= 8) {
 2379       if ($mem$$index < 8) {
 2380         emit_opcode(cbuf, Assembler::REX_WB);
 2381       } else {
 2382         emit_opcode(cbuf, Assembler::REX_WXB);
 2383       }
 2384     } else {
 2385       if ($mem$$index < 8) {
 2386         emit_opcode(cbuf, Assembler::REX_W);
 2387       } else {
 2388         emit_opcode(cbuf, Assembler::REX_WX);
 2389       }
 2390     }
 2391   %}
 2392 
 2393   // for byte regs
 2394   enc_class REX_breg(rRegI reg)
 2395   %{
 2396     if ($reg$$reg >= 4) {
 2397       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2398     }
 2399   %}
 2400 
 2401   // for byte regs
 2402   enc_class REX_reg_breg(rRegI dst, rRegI src)
 2403   %{
 2404     if ($dst$$reg < 8) {
 2405       if ($src$$reg >= 4) {
 2406         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2407       }
 2408     } else {
 2409       if ($src$$reg < 8) {
 2410         emit_opcode(cbuf, Assembler::REX_R);
 2411       } else {
 2412         emit_opcode(cbuf, Assembler::REX_RB);
 2413       }
 2414     }
 2415   %}
 2416 
 2417   // for byte regs
 2418   enc_class REX_breg_mem(rRegI reg, memory mem)
 2419   %{
 2420     if ($reg$$reg < 8) {
 2421       if ($mem$$base < 8) {
 2422         if ($mem$$index >= 8) {
 2423           emit_opcode(cbuf, Assembler::REX_X);
 2424         } else if ($reg$$reg >= 4) {
 2425           emit_opcode(cbuf, Assembler::REX);
 2426         }
 2427       } else {
 2428         if ($mem$$index < 8) {
 2429           emit_opcode(cbuf, Assembler::REX_B);
 2430         } else {
 2431           emit_opcode(cbuf, Assembler::REX_XB);
 2432         }
 2433       }
 2434     } else {
 2435       if ($mem$$base < 8) {
 2436         if ($mem$$index < 8) {
 2437           emit_opcode(cbuf, Assembler::REX_R);
 2438         } else {
 2439           emit_opcode(cbuf, Assembler::REX_RX);
 2440         }
 2441       } else {
 2442         if ($mem$$index < 8) {
 2443           emit_opcode(cbuf, Assembler::REX_RB);
 2444         } else {
 2445           emit_opcode(cbuf, Assembler::REX_RXB);
 2446         }
 2447       }
 2448     }
 2449   %}
 2450 
 2451   enc_class REX_reg(rRegI reg)
 2452   %{
 2453     if ($reg$$reg >= 8) {
 2454       emit_opcode(cbuf, Assembler::REX_B);
 2455     }
 2456   %}
 2457 
 2458   enc_class REX_reg_wide(rRegI reg)
 2459   %{
 2460     if ($reg$$reg < 8) {
 2461       emit_opcode(cbuf, Assembler::REX_W);
 2462     } else {
 2463       emit_opcode(cbuf, Assembler::REX_WB);
 2464     }
 2465   %}
 2466 
 2467   enc_class REX_reg_reg(rRegI dst, rRegI src)
 2468   %{
 2469     if ($dst$$reg < 8) {
 2470       if ($src$$reg >= 8) {
 2471         emit_opcode(cbuf, Assembler::REX_B);
 2472       }
 2473     } else {
 2474       if ($src$$reg < 8) {
 2475         emit_opcode(cbuf, Assembler::REX_R);
 2476       } else {
 2477         emit_opcode(cbuf, Assembler::REX_RB);
 2478       }
 2479     }
 2480   %}
 2481 
 2482   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
 2483   %{
 2484     if ($dst$$reg < 8) {
 2485       if ($src$$reg < 8) {
 2486         emit_opcode(cbuf, Assembler::REX_W);
 2487       } else {
 2488         emit_opcode(cbuf, Assembler::REX_WB);
 2489       }
 2490     } else {
 2491       if ($src$$reg < 8) {
 2492         emit_opcode(cbuf, Assembler::REX_WR);
 2493       } else {
 2494         emit_opcode(cbuf, Assembler::REX_WRB);
 2495       }
 2496     }
 2497   %}
 2498 
 2499   enc_class REX_reg_mem(rRegI reg, memory mem)
 2500   %{
 2501     if ($reg$$reg < 8) {
 2502       if ($mem$$base < 8) {
 2503         if ($mem$$index >= 8) {
 2504           emit_opcode(cbuf, Assembler::REX_X);
 2505         }
 2506       } else {
 2507         if ($mem$$index < 8) {
 2508           emit_opcode(cbuf, Assembler::REX_B);
 2509         } else {
 2510           emit_opcode(cbuf, Assembler::REX_XB);
 2511         }
 2512       }
 2513     } else {
 2514       if ($mem$$base < 8) {
 2515         if ($mem$$index < 8) {
 2516           emit_opcode(cbuf, Assembler::REX_R);
 2517         } else {
 2518           emit_opcode(cbuf, Assembler::REX_RX);
 2519         }
 2520       } else {
 2521         if ($mem$$index < 8) {
 2522           emit_opcode(cbuf, Assembler::REX_RB);
 2523         } else {
 2524           emit_opcode(cbuf, Assembler::REX_RXB);
 2525         }
 2526       }
 2527     }
 2528   %}
 2529 
 2530   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
 2531   %{
 2532     if ($reg$$reg < 8) {
 2533       if ($mem$$base < 8) {
 2534         if ($mem$$index < 8) {
 2535           emit_opcode(cbuf, Assembler::REX_W);
 2536         } else {
 2537           emit_opcode(cbuf, Assembler::REX_WX);
 2538         }
 2539       } else {
 2540         if ($mem$$index < 8) {
 2541           emit_opcode(cbuf, Assembler::REX_WB);
 2542         } else {
 2543           emit_opcode(cbuf, Assembler::REX_WXB);
 2544         }
 2545       }
 2546     } else {
 2547       if ($mem$$base < 8) {
 2548         if ($mem$$index < 8) {
 2549           emit_opcode(cbuf, Assembler::REX_WR);
 2550         } else {
 2551           emit_opcode(cbuf, Assembler::REX_WRX);
 2552         }
 2553       } else {
 2554         if ($mem$$index < 8) {
 2555           emit_opcode(cbuf, Assembler::REX_WRB);
 2556         } else {
 2557           emit_opcode(cbuf, Assembler::REX_WRXB);
 2558         }
 2559       }
 2560     }
 2561   %}
 2562 
 2563   enc_class reg_mem(rRegI ereg, memory mem)
 2564   %{
 2565     // High registers handle in encode_RegMem
 2566     int reg = $ereg$$reg;
 2567     int base = $mem$$base;
 2568     int index = $mem$$index;
 2569     int scale = $mem$$scale;
 2570     int disp = $mem$$disp;
 2571     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2572 
 2573     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
 2574   %}
 2575 
 2576   enc_class RM_opc_mem(immI rm_opcode, memory mem)
 2577   %{
 2578     int rm_byte_opcode = $rm_opcode$$constant;
 2579 
 2580     // High registers handle in encode_RegMem
 2581     int base = $mem$$base;
 2582     int index = $mem$$index;
 2583     int scale = $mem$$scale;
 2584     int displace = $mem$$disp;
 2585 
 2586     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
 2587                                             // working with static
 2588                                             // globals
 2589     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
 2590                   disp_reloc);
 2591   %}
 2592 
 2593   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
 2594   %{
 2595     int reg_encoding = $dst$$reg;
 2596     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2597     int index        = 0x04;            // 0x04 indicates no index
 2598     int scale        = 0x00;            // 0x00 indicates no scale
 2599     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2600     relocInfo::relocType disp_reloc = relocInfo::none;
 2601     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
 2602                   disp_reloc);
 2603   %}
 2604 
 2605   enc_class neg_reg(rRegI dst)
 2606   %{
 2607     int dstenc = $dst$$reg;
 2608     if (dstenc >= 8) {
 2609       emit_opcode(cbuf, Assembler::REX_B);
 2610       dstenc -= 8;
 2611     }
 2612     // NEG $dst
 2613     emit_opcode(cbuf, 0xF7);
 2614     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2615   %}
 2616 
 2617   enc_class neg_reg_wide(rRegI dst)
 2618   %{
 2619     int dstenc = $dst$$reg;
 2620     if (dstenc < 8) {
 2621       emit_opcode(cbuf, Assembler::REX_W);
 2622     } else {
 2623       emit_opcode(cbuf, Assembler::REX_WB);
 2624       dstenc -= 8;
 2625     }
 2626     // NEG $dst
 2627     emit_opcode(cbuf, 0xF7);
 2628     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2629   %}
 2630 
 2631   enc_class setLT_reg(rRegI dst)
 2632   %{
 2633     int dstenc = $dst$$reg;
 2634     if (dstenc >= 8) {
 2635       emit_opcode(cbuf, Assembler::REX_B);
 2636       dstenc -= 8;
 2637     } else if (dstenc >= 4) {
 2638       emit_opcode(cbuf, Assembler::REX);
 2639     }
 2640     // SETLT $dst
 2641     emit_opcode(cbuf, 0x0F);
 2642     emit_opcode(cbuf, 0x9C);
 2643     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2644   %}
 2645 
 2646   enc_class setNZ_reg(rRegI dst)
 2647   %{
 2648     int dstenc = $dst$$reg;
 2649     if (dstenc >= 8) {
 2650       emit_opcode(cbuf, Assembler::REX_B);
 2651       dstenc -= 8;
 2652     } else if (dstenc >= 4) {
 2653       emit_opcode(cbuf, Assembler::REX);
 2654     }
 2655     // SETNZ $dst
 2656     emit_opcode(cbuf, 0x0F);
 2657     emit_opcode(cbuf, 0x95);
 2658     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2659   %}
 2660 
 2661 
 2662   // Compare the lonogs and set -1, 0, or 1 into dst
 2663   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
 2664   %{
 2665     int src1enc = $src1$$reg;
 2666     int src2enc = $src2$$reg;
 2667     int dstenc = $dst$$reg;
 2668 
 2669     // cmpq $src1, $src2
 2670     if (src1enc < 8) {
 2671       if (src2enc < 8) {
 2672         emit_opcode(cbuf, Assembler::REX_W);
 2673       } else {
 2674         emit_opcode(cbuf, Assembler::REX_WB);
 2675       }
 2676     } else {
 2677       if (src2enc < 8) {
 2678         emit_opcode(cbuf, Assembler::REX_WR);
 2679       } else {
 2680         emit_opcode(cbuf, Assembler::REX_WRB);
 2681       }
 2682     }
 2683     emit_opcode(cbuf, 0x3B);
 2684     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
 2685 
 2686     // movl $dst, -1
 2687     if (dstenc >= 8) {
 2688       emit_opcode(cbuf, Assembler::REX_B);
 2689     }
 2690     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
 2691     emit_d32(cbuf, -1);
 2692 
 2693     // jl,s done
 2694     emit_opcode(cbuf, 0x7C);
 2695     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
 2696 
 2697     // setne $dst
 2698     if (dstenc >= 4) {
 2699       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
 2700     }
 2701     emit_opcode(cbuf, 0x0F);
 2702     emit_opcode(cbuf, 0x95);
 2703     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
 2704 
 2705     // movzbl $dst, $dst
 2706     if (dstenc >= 4) {
 2707       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
 2708     }
 2709     emit_opcode(cbuf, 0x0F);
 2710     emit_opcode(cbuf, 0xB6);
 2711     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
 2712   %}
 2713 
 2714   enc_class Push_ResultXD(regD dst) %{
 2715     MacroAssembler _masm(&cbuf);
 2716     __ fstp_d(Address(rsp, 0));
 2717     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2718     __ addptr(rsp, 8);
 2719   %}
 2720 
 2721   enc_class Push_SrcXD(regD src) %{
 2722     MacroAssembler _masm(&cbuf);
 2723     __ subptr(rsp, 8);
 2724     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2725     __ fld_d(Address(rsp, 0));
 2726   %}
 2727 
 2728 
 2729   enc_class enc_rethrow()
 2730   %{
 2731     cbuf.set_insts_mark();
 2732     emit_opcode(cbuf, 0xE9); // jmp entry
 2733     emit_d32_reloc(cbuf,
 2734                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
 2735                    runtime_call_Relocation::spec(),
 2736                    RELOC_DISP32);
 2737   %}
 2738 
 2739 %}
 2740 
 2741 
 2742 
 2743 //----------FRAME--------------------------------------------------------------
 2744 // Definition of frame structure and management information.
 2745 //
 2746 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2747 //                             |   (to get allocators register number
 2748 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2749 //  r   CALLER     |        |
 2750 //  o     |        +--------+      pad to even-align allocators stack-slot
 2751 //  w     V        |  pad0  |        numbers; owned by CALLER
 2752 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 2753 //  h     ^        |   in   |  5
 2754 //        |        |  args  |  4   Holes in incoming args owned by SELF
 2755 //  |     |        |        |  3
 2756 //  |     |        +--------+
 2757 //  V     |        | old out|      Empty on Intel, window on Sparc
 2758 //        |    old |preserve|      Must be even aligned.
 2759 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 2760 //        |        |   in   |  3   area for Intel ret address
 2761 //     Owned by    |preserve|      Empty on Sparc.
 2762 //       SELF      +--------+
 2763 //        |        |  pad2  |  2   pad to align old SP
 2764 //        |        +--------+  1
 2765 //        |        | locks  |  0
 2766 //        |        +--------+----> OptoReg::stack0(), even aligned
 2767 //        |        |  pad1  | 11   pad to align new SP
 2768 //        |        +--------+
 2769 //        |        |        | 10
 2770 //        |        | spills |  9   spills
 2771 //        V        |        |  8   (pad0 slot for callee)
 2772 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 2773 //        ^        |  out   |  7
 2774 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 2775 //     Owned by    +--------+
 2776 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 2777 //        |    new |preserve|      Must be even-aligned.
 2778 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 2779 //        |        |        |
 2780 //
 2781 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 2782 //         known from SELF's arguments and the Java calling convention.
 2783 //         Region 6-7 is determined per call site.
 2784 // Note 2: If the calling convention leaves holes in the incoming argument
 2785 //         area, those holes are owned by SELF.  Holes in the outgoing area
 2786 //         are owned by the CALLEE.  Holes should not be necessary in the
 2787 //         incoming area, as the Java calling convention is completely under
 2788 //         the control of the AD file.  Doubles can be sorted and packed to
 2789 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 2790 //         varargs C calling conventions.
 2791 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 2792 //         even aligned with pad0 as needed.
 2793 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 2794 //         region 6-11 is even aligned; it may be padded out more so that
 2795 //         the region from SP to FP meets the minimum stack alignment.
 2796 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 2797 //         alignment.  Region 11, pad1, may be dynamically extended so that
 2798 //         SP meets the minimum alignment.
 2799 
 2800 frame
 2801 %{
 2802   // These three registers define part of the calling convention
 2803   // between compiled code and the interpreter.
 2804   inline_cache_reg(RAX);                // Inline Cache Register
 2805 
 2806   // Optional: name the operand used by cisc-spilling to access
 2807   // [stack_pointer + offset]
 2808   cisc_spilling_operand_name(indOffset32);
 2809 
 2810   // Number of stack slots consumed by locking an object
 2811   sync_stack_slots(2);
 2812 
 2813   // Compiled code's Frame Pointer
 2814   frame_pointer(RSP);
 2815 
 2816   // Interpreter stores its frame pointer in a register which is
 2817   // stored to the stack by I2CAdaptors.
 2818   // I2CAdaptors convert from interpreted java to compiled java.
 2819   interpreter_frame_pointer(RBP);
 2820 
 2821   // Stack alignment requirement
 2822   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 2823 
 2824   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 2825   // for calls to C.  Supports the var-args backing area for register parms.
 2826   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 2827 
 2828   // The after-PROLOG location of the return address.  Location of
 2829   // return address specifies a type (REG or STACK) and a number
 2830   // representing the register number (i.e. - use a register name) or
 2831   // stack slot.
 2832   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 2833   // Otherwise, it is above the locks and verification slot and alignment word
 2834   return_addr(STACK - 2 +
 2835               align_up((Compile::current()->in_preserve_stack_slots() +
 2836                         Compile::current()->fixed_slots()),
 2837                        stack_alignment_in_slots()));
 2838 
 2839   // Location of compiled Java return values.  Same as C for now.
 2840   return_value
 2841   %{
 2842     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 2843            "only return normal values");
 2844 
 2845     static const int lo[Op_RegL + 1] = {
 2846       0,
 2847       0,
 2848       RAX_num,  // Op_RegN
 2849       RAX_num,  // Op_RegI
 2850       RAX_num,  // Op_RegP
 2851       XMM0_num, // Op_RegF
 2852       XMM0_num, // Op_RegD
 2853       RAX_num   // Op_RegL
 2854     };
 2855     static const int hi[Op_RegL + 1] = {
 2856       0,
 2857       0,
 2858       OptoReg::Bad, // Op_RegN
 2859       OptoReg::Bad, // Op_RegI
 2860       RAX_H_num,    // Op_RegP
 2861       OptoReg::Bad, // Op_RegF
 2862       XMM0b_num,    // Op_RegD
 2863       RAX_H_num     // Op_RegL
 2864     };
 2865     // Excluded flags and vector registers.
 2866     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2867     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2868   %}
 2869 %}
 2870 
 2871 //----------ATTRIBUTES---------------------------------------------------------
 2872 //----------Operand Attributes-------------------------------------------------
 2873 op_attrib op_cost(0);        // Required cost attribute
 2874 
 2875 //----------Instruction Attributes---------------------------------------------
 2876 ins_attrib ins_cost(100);       // Required cost attribute
 2877 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2878 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2879                                 // a non-matching short branch variant
 2880                                 // of some long branch?
 2881 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2882                                 // be a power of 2) specifies the
 2883                                 // alignment that some part of the
 2884                                 // instruction (not necessarily the
 2885                                 // start) requires.  If > 1, a
 2886                                 // compute_padding() function must be
 2887                                 // provided for the instruction
 2888 
 2889 //----------OPERANDS-----------------------------------------------------------
 2890 // Operand definitions must precede instruction definitions for correct parsing
 2891 // in the ADLC because operands constitute user defined types which are used in
 2892 // instruction definitions.
 2893 
 2894 //----------Simple Operands----------------------------------------------------
 2895 // Immediate Operands
 2896 // Integer Immediate
 2897 operand immI()
 2898 %{
 2899   match(ConI);
 2900 
 2901   op_cost(10);
 2902   format %{ %}
 2903   interface(CONST_INTER);
 2904 %}
 2905 
 2906 // Constant for test vs zero
 2907 operand immI_0()
 2908 %{
 2909   predicate(n->get_int() == 0);
 2910   match(ConI);
 2911 
 2912   op_cost(0);
 2913   format %{ %}
 2914   interface(CONST_INTER);
 2915 %}
 2916 
 2917 // Constant for increment
 2918 operand immI_1()
 2919 %{
 2920   predicate(n->get_int() == 1);
 2921   match(ConI);
 2922 
 2923   op_cost(0);
 2924   format %{ %}
 2925   interface(CONST_INTER);
 2926 %}
 2927 
 2928 // Constant for decrement
 2929 operand immI_M1()
 2930 %{
 2931   predicate(n->get_int() == -1);
 2932   match(ConI);
 2933 
 2934   op_cost(0);
 2935   format %{ %}
 2936   interface(CONST_INTER);
 2937 %}
 2938 
 2939 operand immI_2()
 2940 %{
 2941   predicate(n->get_int() == 2);
 2942   match(ConI);
 2943 
 2944   op_cost(0);
 2945   format %{ %}
 2946   interface(CONST_INTER);
 2947 %}
 2948 
 2949 operand immI_4()
 2950 %{
 2951   predicate(n->get_int() == 4);
 2952   match(ConI);
 2953 
 2954   op_cost(0);
 2955   format %{ %}
 2956   interface(CONST_INTER);
 2957 %}
 2958 
 2959 operand immI_8()
 2960 %{
 2961   predicate(n->get_int() == 8);
 2962   match(ConI);
 2963 
 2964   op_cost(0);
 2965   format %{ %}
 2966   interface(CONST_INTER);
 2967 %}
 2968 
 2969 // Valid scale values for addressing modes
 2970 operand immI2()
 2971 %{
 2972   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2973   match(ConI);
 2974 
 2975   format %{ %}
 2976   interface(CONST_INTER);
 2977 %}
 2978 
 2979 operand immU7()
 2980 %{
 2981   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2982   match(ConI);
 2983 
 2984   op_cost(5);
 2985   format %{ %}
 2986   interface(CONST_INTER);
 2987 %}
 2988 
 2989 operand immI8()
 2990 %{
 2991   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2992   match(ConI);
 2993 
 2994   op_cost(5);
 2995   format %{ %}
 2996   interface(CONST_INTER);
 2997 %}
 2998 
 2999 operand immU8()
 3000 %{
 3001   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3002   match(ConI);
 3003 
 3004   op_cost(5);
 3005   format %{ %}
 3006   interface(CONST_INTER);
 3007 %}
 3008 
 3009 operand immI16()
 3010 %{
 3011   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3012   match(ConI);
 3013 
 3014   op_cost(10);
 3015   format %{ %}
 3016   interface(CONST_INTER);
 3017 %}
 3018 
 3019 // Int Immediate non-negative
 3020 operand immU31()
 3021 %{
 3022   predicate(n->get_int() >= 0);
 3023   match(ConI);
 3024 
 3025   op_cost(0);
 3026   format %{ %}
 3027   interface(CONST_INTER);
 3028 %}
 3029 
 3030 // Constant for long shifts
 3031 operand immI_32()
 3032 %{
 3033   predicate( n->get_int() == 32 );
 3034   match(ConI);
 3035 
 3036   op_cost(0);
 3037   format %{ %}
 3038   interface(CONST_INTER);
 3039 %}
 3040 
 3041 // Constant for long shifts
 3042 operand immI_64()
 3043 %{
 3044   predicate( n->get_int() == 64 );
 3045   match(ConI);
 3046 
 3047   op_cost(0);
 3048   format %{ %}
 3049   interface(CONST_INTER);
 3050 %}
 3051 
 3052 // Pointer Immediate
 3053 operand immP()
 3054 %{
 3055   match(ConP);
 3056 
 3057   op_cost(10);
 3058   format %{ %}
 3059   interface(CONST_INTER);
 3060 %}
 3061 
 3062 // nullptr Pointer Immediate
 3063 operand immP0()
 3064 %{
 3065   predicate(n->get_ptr() == 0);
 3066   match(ConP);
 3067 
 3068   op_cost(5);
 3069   format %{ %}
 3070   interface(CONST_INTER);
 3071 %}
 3072 
 3073 // Pointer Immediate
 3074 operand immN() %{
 3075   match(ConN);
 3076 
 3077   op_cost(10);
 3078   format %{ %}
 3079   interface(CONST_INTER);
 3080 %}
 3081 
 3082 operand immNKlass() %{
 3083   match(ConNKlass);
 3084 
 3085   op_cost(10);
 3086   format %{ %}
 3087   interface(CONST_INTER);
 3088 %}
 3089 
 3090 // nullptr Pointer Immediate
 3091 operand immN0() %{
 3092   predicate(n->get_narrowcon() == 0);
 3093   match(ConN);
 3094 
 3095   op_cost(5);
 3096   format %{ %}
 3097   interface(CONST_INTER);
 3098 %}
 3099 
 3100 operand immP31()
 3101 %{
 3102   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3103             && (n->get_ptr() >> 31) == 0);
 3104   match(ConP);
 3105 
 3106   op_cost(5);
 3107   format %{ %}
 3108   interface(CONST_INTER);
 3109 %}
 3110 
 3111 
 3112 // Long Immediate
 3113 operand immL()
 3114 %{
 3115   match(ConL);
 3116 
 3117   op_cost(20);
 3118   format %{ %}
 3119   interface(CONST_INTER);
 3120 %}
 3121 
 3122 // Long Immediate 8-bit
 3123 operand immL8()
 3124 %{
 3125   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 3126   match(ConL);
 3127 
 3128   op_cost(5);
 3129   format %{ %}
 3130   interface(CONST_INTER);
 3131 %}
 3132 
 3133 // Long Immediate 32-bit unsigned
 3134 operand immUL32()
 3135 %{
 3136   predicate(n->get_long() == (unsigned int) (n->get_long()));
 3137   match(ConL);
 3138 
 3139   op_cost(10);
 3140   format %{ %}
 3141   interface(CONST_INTER);
 3142 %}
 3143 
 3144 // Long Immediate 32-bit signed
 3145 operand immL32()
 3146 %{
 3147   predicate(n->get_long() == (int) (n->get_long()));
 3148   match(ConL);
 3149 
 3150   op_cost(15);
 3151   format %{ %}
 3152   interface(CONST_INTER);
 3153 %}
 3154 
 3155 operand immL_Pow2()
 3156 %{
 3157   predicate(is_power_of_2((julong)n->get_long()));
 3158   match(ConL);
 3159 
 3160   op_cost(15);
 3161   format %{ %}
 3162   interface(CONST_INTER);
 3163 %}
 3164 
 3165 operand immL_NotPow2()
 3166 %{
 3167   predicate(is_power_of_2((julong)~n->get_long()));
 3168   match(ConL);
 3169 
 3170   op_cost(15);
 3171   format %{ %}
 3172   interface(CONST_INTER);
 3173 %}
 3174 
 3175 // Long Immediate zero
 3176 operand immL0()
 3177 %{
 3178   predicate(n->get_long() == 0L);
 3179   match(ConL);
 3180 
 3181   op_cost(10);
 3182   format %{ %}
 3183   interface(CONST_INTER);
 3184 %}
 3185 
 3186 // Constant for increment
 3187 operand immL1()
 3188 %{
 3189   predicate(n->get_long() == 1);
 3190   match(ConL);
 3191 
 3192   format %{ %}
 3193   interface(CONST_INTER);
 3194 %}
 3195 
 3196 // Constant for decrement
 3197 operand immL_M1()
 3198 %{
 3199   predicate(n->get_long() == -1);
 3200   match(ConL);
 3201 
 3202   format %{ %}
 3203   interface(CONST_INTER);
 3204 %}
 3205 
 3206 // Long Immediate: the value 10
 3207 operand immL10()
 3208 %{
 3209   predicate(n->get_long() == 10);
 3210   match(ConL);
 3211 
 3212   format %{ %}
 3213   interface(CONST_INTER);
 3214 %}
 3215 
 3216 // Long immediate from 0 to 127.
 3217 // Used for a shorter form of long mul by 10.
 3218 operand immL_127()
 3219 %{
 3220   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 3221   match(ConL);
 3222 
 3223   op_cost(10);
 3224   format %{ %}
 3225   interface(CONST_INTER);
 3226 %}
 3227 
 3228 // Long Immediate: low 32-bit mask
 3229 operand immL_32bits()
 3230 %{
 3231   predicate(n->get_long() == 0xFFFFFFFFL);
 3232   match(ConL);
 3233   op_cost(20);
 3234 
 3235   format %{ %}
 3236   interface(CONST_INTER);
 3237 %}
 3238 
 3239 // Int Immediate: 2^n-1, positive
 3240 operand immI_Pow2M1()
 3241 %{
 3242   predicate((n->get_int() > 0)
 3243             && is_power_of_2((juint)n->get_int() + 1));
 3244   match(ConI);
 3245 
 3246   op_cost(20);
 3247   format %{ %}
 3248   interface(CONST_INTER);
 3249 %}
 3250 
 3251 // Float Immediate zero
 3252 operand immF0()
 3253 %{
 3254   predicate(jint_cast(n->getf()) == 0);
 3255   match(ConF);
 3256 
 3257   op_cost(5);
 3258   format %{ %}
 3259   interface(CONST_INTER);
 3260 %}
 3261 
 3262 // Float Immediate
 3263 operand immF()
 3264 %{
 3265   match(ConF);
 3266 
 3267   op_cost(15);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 // Double Immediate zero
 3273 operand immD0()
 3274 %{
 3275   predicate(jlong_cast(n->getd()) == 0);
 3276   match(ConD);
 3277 
 3278   op_cost(5);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 // Double Immediate
 3284 operand immD()
 3285 %{
 3286   match(ConD);
 3287 
 3288   op_cost(15);
 3289   format %{ %}
 3290   interface(CONST_INTER);
 3291 %}
 3292 
 3293 // Immediates for special shifts (sign extend)
 3294 
 3295 // Constants for increment
 3296 operand immI_16()
 3297 %{
 3298   predicate(n->get_int() == 16);
 3299   match(ConI);
 3300 
 3301   format %{ %}
 3302   interface(CONST_INTER);
 3303 %}
 3304 
 3305 operand immI_24()
 3306 %{
 3307   predicate(n->get_int() == 24);
 3308   match(ConI);
 3309 
 3310   format %{ %}
 3311   interface(CONST_INTER);
 3312 %}
 3313 
 3314 // Constant for byte-wide masking
 3315 operand immI_255()
 3316 %{
 3317   predicate(n->get_int() == 255);
 3318   match(ConI);
 3319 
 3320   format %{ %}
 3321   interface(CONST_INTER);
 3322 %}
 3323 
 3324 // Constant for short-wide masking
 3325 operand immI_65535()
 3326 %{
 3327   predicate(n->get_int() == 65535);
 3328   match(ConI);
 3329 
 3330   format %{ %}
 3331   interface(CONST_INTER);
 3332 %}
 3333 
 3334 // Constant for byte-wide masking
 3335 operand immL_255()
 3336 %{
 3337   predicate(n->get_long() == 255);
 3338   match(ConL);
 3339 
 3340   format %{ %}
 3341   interface(CONST_INTER);
 3342 %}
 3343 
 3344 // Constant for short-wide masking
 3345 operand immL_65535()
 3346 %{
 3347   predicate(n->get_long() == 65535);
 3348   match(ConL);
 3349 
 3350   format %{ %}
 3351   interface(CONST_INTER);
 3352 %}
 3353 
 3354 operand kReg()
 3355 %{
 3356   constraint(ALLOC_IN_RC(vectmask_reg));
 3357   match(RegVectMask);
 3358   format %{%}
 3359   interface(REG_INTER);
 3360 %}
 3361 
 3362 operand kReg_K1()
 3363 %{
 3364   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3365   match(RegVectMask);
 3366   format %{%}
 3367   interface(REG_INTER);
 3368 %}
 3369 
 3370 operand kReg_K2()
 3371 %{
 3372   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3373   match(RegVectMask);
 3374   format %{%}
 3375   interface(REG_INTER);
 3376 %}
 3377 
 3378 // Special Registers
 3379 operand kReg_K3()
 3380 %{
 3381   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3382   match(RegVectMask);
 3383   format %{%}
 3384   interface(REG_INTER);
 3385 %}
 3386 
 3387 operand kReg_K4()
 3388 %{
 3389   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3390   match(RegVectMask);
 3391   format %{%}
 3392   interface(REG_INTER);
 3393 %}
 3394 
 3395 operand kReg_K5()
 3396 %{
 3397   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3398   match(RegVectMask);
 3399   format %{%}
 3400   interface(REG_INTER);
 3401 %}
 3402 
 3403 operand kReg_K6()
 3404 %{
 3405   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3406   match(RegVectMask);
 3407   format %{%}
 3408   interface(REG_INTER);
 3409 %}
 3410 
 3411 // Special Registers
 3412 operand kReg_K7()
 3413 %{
 3414   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3415   match(RegVectMask);
 3416   format %{%}
 3417   interface(REG_INTER);
 3418 %}
 3419 
 3420 // Register Operands
 3421 // Integer Register
 3422 operand rRegI()
 3423 %{
 3424   constraint(ALLOC_IN_RC(int_reg));
 3425   match(RegI);
 3426 
 3427   match(rax_RegI);
 3428   match(rbx_RegI);
 3429   match(rcx_RegI);
 3430   match(rdx_RegI);
 3431   match(rdi_RegI);
 3432 
 3433   format %{ %}
 3434   interface(REG_INTER);
 3435 %}
 3436 
 3437 // Special Registers
 3438 operand rax_RegI()
 3439 %{
 3440   constraint(ALLOC_IN_RC(int_rax_reg));
 3441   match(RegI);
 3442   match(rRegI);
 3443 
 3444   format %{ "RAX" %}
 3445   interface(REG_INTER);
 3446 %}
 3447 
 3448 // Special Registers
 3449 operand rbx_RegI()
 3450 %{
 3451   constraint(ALLOC_IN_RC(int_rbx_reg));
 3452   match(RegI);
 3453   match(rRegI);
 3454 
 3455   format %{ "RBX" %}
 3456   interface(REG_INTER);
 3457 %}
 3458 
 3459 operand rcx_RegI()
 3460 %{
 3461   constraint(ALLOC_IN_RC(int_rcx_reg));
 3462   match(RegI);
 3463   match(rRegI);
 3464 
 3465   format %{ "RCX" %}
 3466   interface(REG_INTER);
 3467 %}
 3468 
 3469 operand rdx_RegI()
 3470 %{
 3471   constraint(ALLOC_IN_RC(int_rdx_reg));
 3472   match(RegI);
 3473   match(rRegI);
 3474 
 3475   format %{ "RDX" %}
 3476   interface(REG_INTER);
 3477 %}
 3478 
 3479 operand rdi_RegI()
 3480 %{
 3481   constraint(ALLOC_IN_RC(int_rdi_reg));
 3482   match(RegI);
 3483   match(rRegI);
 3484 
 3485   format %{ "RDI" %}
 3486   interface(REG_INTER);
 3487 %}
 3488 
 3489 operand no_rax_rdx_RegI()
 3490 %{
 3491   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 3492   match(RegI);
 3493   match(rbx_RegI);
 3494   match(rcx_RegI);
 3495   match(rdi_RegI);
 3496 
 3497   format %{ %}
 3498   interface(REG_INTER);
 3499 %}
 3500 
 3501 operand no_rbp_r13_RegI()
 3502 %{
 3503   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 3504   match(RegI);
 3505   match(rRegI);
 3506   match(rax_RegI);
 3507   match(rbx_RegI);
 3508   match(rcx_RegI);
 3509   match(rdx_RegI);
 3510   match(rdi_RegI);
 3511 
 3512   format %{ %}
 3513   interface(REG_INTER);
 3514 %}
 3515 
 3516 // Pointer Register
 3517 operand any_RegP()
 3518 %{
 3519   constraint(ALLOC_IN_RC(any_reg));
 3520   match(RegP);
 3521   match(rax_RegP);
 3522   match(rbx_RegP);
 3523   match(rdi_RegP);
 3524   match(rsi_RegP);
 3525   match(rbp_RegP);
 3526   match(r15_RegP);
 3527   match(rRegP);
 3528 
 3529   format %{ %}
 3530   interface(REG_INTER);
 3531 %}
 3532 
 3533 operand rRegP()
 3534 %{
 3535   constraint(ALLOC_IN_RC(ptr_reg));
 3536   match(RegP);
 3537   match(rax_RegP);
 3538   match(rbx_RegP);
 3539   match(rdi_RegP);
 3540   match(rsi_RegP);
 3541   match(rbp_RegP);  // See Q&A below about
 3542   match(r15_RegP);  // r15_RegP and rbp_RegP.
 3543 
 3544   format %{ %}
 3545   interface(REG_INTER);
 3546 %}
 3547 
 3548 operand rRegN() %{
 3549   constraint(ALLOC_IN_RC(int_reg));
 3550   match(RegN);
 3551 
 3552   format %{ %}
 3553   interface(REG_INTER);
 3554 %}
 3555 
 3556 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 3557 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 3558 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 3559 // The output of an instruction is controlled by the allocator, which respects
 3560 // register class masks, not match rules.  Unless an instruction mentions
 3561 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 3562 // by the allocator as an input.
 3563 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 3564 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 3565 // result, RBP is not included in the output of the instruction either.
 3566 
 3567 operand no_rax_RegP()
 3568 %{
 3569   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 3570   match(RegP);
 3571   match(rbx_RegP);
 3572   match(rsi_RegP);
 3573   match(rdi_RegP);
 3574 
 3575   format %{ %}
 3576   interface(REG_INTER);
 3577 %}
 3578 
 3579 // This operand is not allowed to use RBP even if
 3580 // RBP is not used to hold the frame pointer.
 3581 operand no_rbp_RegP()
 3582 %{
 3583   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 3584   match(RegP);
 3585   match(rbx_RegP);
 3586   match(rsi_RegP);
 3587   match(rdi_RegP);
 3588 
 3589   format %{ %}
 3590   interface(REG_INTER);
 3591 %}
 3592 
 3593 operand no_rax_rbx_RegP()
 3594 %{
 3595   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 3596   match(RegP);
 3597   match(rsi_RegP);
 3598   match(rdi_RegP);
 3599 
 3600   format %{ %}
 3601   interface(REG_INTER);
 3602 %}
 3603 
 3604 // Special Registers
 3605 // Return a pointer value
 3606 operand rax_RegP()
 3607 %{
 3608   constraint(ALLOC_IN_RC(ptr_rax_reg));
 3609   match(RegP);
 3610   match(rRegP);
 3611 
 3612   format %{ %}
 3613   interface(REG_INTER);
 3614 %}
 3615 
 3616 // Special Registers
 3617 // Return a compressed pointer value
 3618 operand rax_RegN()
 3619 %{
 3620   constraint(ALLOC_IN_RC(int_rax_reg));
 3621   match(RegN);
 3622   match(rRegN);
 3623 
 3624   format %{ %}
 3625   interface(REG_INTER);
 3626 %}
 3627 
 3628 // Used in AtomicAdd
 3629 operand rbx_RegP()
 3630 %{
 3631   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 3632   match(RegP);
 3633   match(rRegP);
 3634 
 3635   format %{ %}
 3636   interface(REG_INTER);
 3637 %}
 3638 
 3639 operand rsi_RegP()
 3640 %{
 3641   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 3642   match(RegP);
 3643   match(rRegP);
 3644 
 3645   format %{ %}
 3646   interface(REG_INTER);
 3647 %}
 3648 
 3649 operand rbp_RegP()
 3650 %{
 3651   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 3652   match(RegP);
 3653   match(rRegP);
 3654 
 3655   format %{ %}
 3656   interface(REG_INTER);
 3657 %}
 3658 
 3659 // Used in rep stosq
 3660 operand rdi_RegP()
 3661 %{
 3662   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 3663   match(RegP);
 3664   match(rRegP);
 3665 
 3666   format %{ %}
 3667   interface(REG_INTER);
 3668 %}
 3669 
 3670 operand r15_RegP()
 3671 %{
 3672   constraint(ALLOC_IN_RC(ptr_r15_reg));
 3673   match(RegP);
 3674   match(rRegP);
 3675 
 3676   format %{ %}
 3677   interface(REG_INTER);
 3678 %}
 3679 
 3680 operand rRegL()
 3681 %{
 3682   constraint(ALLOC_IN_RC(long_reg));
 3683   match(RegL);
 3684   match(rax_RegL);
 3685   match(rdx_RegL);
 3686 
 3687   format %{ %}
 3688   interface(REG_INTER);
 3689 %}
 3690 
 3691 // Special Registers
 3692 operand no_rax_rdx_RegL()
 3693 %{
 3694   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 3695   match(RegL);
 3696   match(rRegL);
 3697 
 3698   format %{ %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand rax_RegL()
 3703 %{
 3704   constraint(ALLOC_IN_RC(long_rax_reg));
 3705   match(RegL);
 3706   match(rRegL);
 3707 
 3708   format %{ "RAX" %}
 3709   interface(REG_INTER);
 3710 %}
 3711 
 3712 operand rcx_RegL()
 3713 %{
 3714   constraint(ALLOC_IN_RC(long_rcx_reg));
 3715   match(RegL);
 3716   match(rRegL);
 3717 
 3718   format %{ %}
 3719   interface(REG_INTER);
 3720 %}
 3721 
 3722 operand rdx_RegL()
 3723 %{
 3724   constraint(ALLOC_IN_RC(long_rdx_reg));
 3725   match(RegL);
 3726   match(rRegL);
 3727 
 3728   format %{ %}
 3729   interface(REG_INTER);
 3730 %}
 3731 
 3732 operand no_rbp_r13_RegL()
 3733 %{
 3734   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 3735   match(RegL);
 3736   match(rRegL);
 3737   match(rax_RegL);
 3738   match(rcx_RegL);
 3739   match(rdx_RegL);
 3740 
 3741   format %{ %}
 3742   interface(REG_INTER);
 3743 %}
 3744 
 3745 // Flags register, used as output of compare instructions
 3746 operand rFlagsReg()
 3747 %{
 3748   constraint(ALLOC_IN_RC(int_flags));
 3749   match(RegFlags);
 3750 
 3751   format %{ "RFLAGS" %}
 3752   interface(REG_INTER);
 3753 %}
 3754 
 3755 // Flags register, used as output of FLOATING POINT compare instructions
 3756 operand rFlagsRegU()
 3757 %{
 3758   constraint(ALLOC_IN_RC(int_flags));
 3759   match(RegFlags);
 3760 
 3761   format %{ "RFLAGS_U" %}
 3762   interface(REG_INTER);
 3763 %}
 3764 
 3765 operand rFlagsRegUCF() %{
 3766   constraint(ALLOC_IN_RC(int_flags));
 3767   match(RegFlags);
 3768   predicate(false);
 3769 
 3770   format %{ "RFLAGS_U_CF" %}
 3771   interface(REG_INTER);
 3772 %}
 3773 
 3774 // Float register operands
 3775 operand regF() %{
 3776    constraint(ALLOC_IN_RC(float_reg));
 3777    match(RegF);
 3778 
 3779    format %{ %}
 3780    interface(REG_INTER);
 3781 %}
 3782 
 3783 // Float register operands
 3784 operand legRegF() %{
 3785    constraint(ALLOC_IN_RC(float_reg_legacy));
 3786    match(RegF);
 3787 
 3788    format %{ %}
 3789    interface(REG_INTER);
 3790 %}
 3791 
 3792 // Float register operands
 3793 operand vlRegF() %{
 3794    constraint(ALLOC_IN_RC(float_reg_vl));
 3795    match(RegF);
 3796 
 3797    format %{ %}
 3798    interface(REG_INTER);
 3799 %}
 3800 
 3801 // Double register operands
 3802 operand regD() %{
 3803    constraint(ALLOC_IN_RC(double_reg));
 3804    match(RegD);
 3805 
 3806    format %{ %}
 3807    interface(REG_INTER);
 3808 %}
 3809 
 3810 // Double register operands
 3811 operand legRegD() %{
 3812    constraint(ALLOC_IN_RC(double_reg_legacy));
 3813    match(RegD);
 3814 
 3815    format %{ %}
 3816    interface(REG_INTER);
 3817 %}
 3818 
 3819 // Double register operands
 3820 operand vlRegD() %{
 3821    constraint(ALLOC_IN_RC(double_reg_vl));
 3822    match(RegD);
 3823 
 3824    format %{ %}
 3825    interface(REG_INTER);
 3826 %}
 3827 
 3828 //----------Memory Operands----------------------------------------------------
 3829 // Direct Memory Operand
 3830 // operand direct(immP addr)
 3831 // %{
 3832 //   match(addr);
 3833 
 3834 //   format %{ "[$addr]" %}
 3835 //   interface(MEMORY_INTER) %{
 3836 //     base(0xFFFFFFFF);
 3837 //     index(0x4);
 3838 //     scale(0x0);
 3839 //     disp($addr);
 3840 //   %}
 3841 // %}
 3842 
 3843 // Indirect Memory Operand
 3844 operand indirect(any_RegP reg)
 3845 %{
 3846   constraint(ALLOC_IN_RC(ptr_reg));
 3847   match(reg);
 3848 
 3849   format %{ "[$reg]" %}
 3850   interface(MEMORY_INTER) %{
 3851     base($reg);
 3852     index(0x4);
 3853     scale(0x0);
 3854     disp(0x0);
 3855   %}
 3856 %}
 3857 
 3858 // Indirect Memory Plus Short Offset Operand
 3859 operand indOffset8(any_RegP reg, immL8 off)
 3860 %{
 3861   constraint(ALLOC_IN_RC(ptr_reg));
 3862   match(AddP reg off);
 3863 
 3864   format %{ "[$reg + $off (8-bit)]" %}
 3865   interface(MEMORY_INTER) %{
 3866     base($reg);
 3867     index(0x4);
 3868     scale(0x0);
 3869     disp($off);
 3870   %}
 3871 %}
 3872 
 3873 // Indirect Memory Plus Long Offset Operand
 3874 operand indOffset32(any_RegP reg, immL32 off)
 3875 %{
 3876   constraint(ALLOC_IN_RC(ptr_reg));
 3877   match(AddP reg off);
 3878 
 3879   format %{ "[$reg + $off (32-bit)]" %}
 3880   interface(MEMORY_INTER) %{
 3881     base($reg);
 3882     index(0x4);
 3883     scale(0x0);
 3884     disp($off);
 3885   %}
 3886 %}
 3887 
 3888 // Indirect Memory Plus Index Register Plus Offset Operand
 3889 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3890 %{
 3891   constraint(ALLOC_IN_RC(ptr_reg));
 3892   match(AddP (AddP reg lreg) off);
 3893 
 3894   op_cost(10);
 3895   format %{"[$reg + $off + $lreg]" %}
 3896   interface(MEMORY_INTER) %{
 3897     base($reg);
 3898     index($lreg);
 3899     scale(0x0);
 3900     disp($off);
 3901   %}
 3902 %}
 3903 
 3904 // Indirect Memory Plus Index Register Plus Offset Operand
 3905 operand indIndex(any_RegP reg, rRegL lreg)
 3906 %{
 3907   constraint(ALLOC_IN_RC(ptr_reg));
 3908   match(AddP reg lreg);
 3909 
 3910   op_cost(10);
 3911   format %{"[$reg + $lreg]" %}
 3912   interface(MEMORY_INTER) %{
 3913     base($reg);
 3914     index($lreg);
 3915     scale(0x0);
 3916     disp(0x0);
 3917   %}
 3918 %}
 3919 
 3920 // Indirect Memory Times Scale Plus Index Register
 3921 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3922 %{
 3923   constraint(ALLOC_IN_RC(ptr_reg));
 3924   match(AddP reg (LShiftL lreg scale));
 3925 
 3926   op_cost(10);
 3927   format %{"[$reg + $lreg << $scale]" %}
 3928   interface(MEMORY_INTER) %{
 3929     base($reg);
 3930     index($lreg);
 3931     scale($scale);
 3932     disp(0x0);
 3933   %}
 3934 %}
 3935 
 3936 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3937 %{
 3938   constraint(ALLOC_IN_RC(ptr_reg));
 3939   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3940   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3941 
 3942   op_cost(10);
 3943   format %{"[$reg + pos $idx << $scale]" %}
 3944   interface(MEMORY_INTER) %{
 3945     base($reg);
 3946     index($idx);
 3947     scale($scale);
 3948     disp(0x0);
 3949   %}
 3950 %}
 3951 
 3952 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3953 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3954 %{
 3955   constraint(ALLOC_IN_RC(ptr_reg));
 3956   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3957 
 3958   op_cost(10);
 3959   format %{"[$reg + $off + $lreg << $scale]" %}
 3960   interface(MEMORY_INTER) %{
 3961     base($reg);
 3962     index($lreg);
 3963     scale($scale);
 3964     disp($off);
 3965   %}
 3966 %}
 3967 
 3968 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3969 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3970 %{
 3971   constraint(ALLOC_IN_RC(ptr_reg));
 3972   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3973   match(AddP (AddP reg (ConvI2L idx)) off);
 3974 
 3975   op_cost(10);
 3976   format %{"[$reg + $off + $idx]" %}
 3977   interface(MEMORY_INTER) %{
 3978     base($reg);
 3979     index($idx);
 3980     scale(0x0);
 3981     disp($off);
 3982   %}
 3983 %}
 3984 
 3985 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3986 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3987 %{
 3988   constraint(ALLOC_IN_RC(ptr_reg));
 3989   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3990   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3991 
 3992   op_cost(10);
 3993   format %{"[$reg + $off + $idx << $scale]" %}
 3994   interface(MEMORY_INTER) %{
 3995     base($reg);
 3996     index($idx);
 3997     scale($scale);
 3998     disp($off);
 3999   %}
 4000 %}
 4001 
 4002 // Indirect Narrow Oop Operand
 4003 operand indCompressedOop(rRegN reg) %{
 4004   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4005   constraint(ALLOC_IN_RC(ptr_reg));
 4006   match(DecodeN reg);
 4007 
 4008   op_cost(10);
 4009   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 4010   interface(MEMORY_INTER) %{
 4011     base(0xc); // R12
 4012     index($reg);
 4013     scale(0x3);
 4014     disp(0x0);
 4015   %}
 4016 %}
 4017 
 4018 // Indirect Narrow Oop Plus Offset Operand
 4019 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4020 // we can't free r12 even with CompressedOops::base() == nullptr.
 4021 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4022   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4023   constraint(ALLOC_IN_RC(ptr_reg));
 4024   match(AddP (DecodeN reg) off);
 4025 
 4026   op_cost(10);
 4027   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4028   interface(MEMORY_INTER) %{
 4029     base(0xc); // R12
 4030     index($reg);
 4031     scale(0x3);
 4032     disp($off);
 4033   %}
 4034 %}
 4035 
 4036 // Indirect Memory Operand
 4037 operand indirectNarrow(rRegN reg)
 4038 %{
 4039   predicate(CompressedOops::shift() == 0);
 4040   constraint(ALLOC_IN_RC(ptr_reg));
 4041   match(DecodeN reg);
 4042 
 4043   format %{ "[$reg]" %}
 4044   interface(MEMORY_INTER) %{
 4045     base($reg);
 4046     index(0x4);
 4047     scale(0x0);
 4048     disp(0x0);
 4049   %}
 4050 %}
 4051 
 4052 // Indirect Memory Plus Short Offset Operand
 4053 operand indOffset8Narrow(rRegN reg, immL8 off)
 4054 %{
 4055   predicate(CompressedOops::shift() == 0);
 4056   constraint(ALLOC_IN_RC(ptr_reg));
 4057   match(AddP (DecodeN reg) off);
 4058 
 4059   format %{ "[$reg + $off (8-bit)]" %}
 4060   interface(MEMORY_INTER) %{
 4061     base($reg);
 4062     index(0x4);
 4063     scale(0x0);
 4064     disp($off);
 4065   %}
 4066 %}
 4067 
 4068 // Indirect Memory Plus Long Offset Operand
 4069 operand indOffset32Narrow(rRegN reg, immL32 off)
 4070 %{
 4071   predicate(CompressedOops::shift() == 0);
 4072   constraint(ALLOC_IN_RC(ptr_reg));
 4073   match(AddP (DecodeN reg) off);
 4074 
 4075   format %{ "[$reg + $off (32-bit)]" %}
 4076   interface(MEMORY_INTER) %{
 4077     base($reg);
 4078     index(0x4);
 4079     scale(0x0);
 4080     disp($off);
 4081   %}
 4082 %}
 4083 
 4084 // Indirect Memory Plus Index Register Plus Offset Operand
 4085 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 4086 %{
 4087   predicate(CompressedOops::shift() == 0);
 4088   constraint(ALLOC_IN_RC(ptr_reg));
 4089   match(AddP (AddP (DecodeN reg) lreg) off);
 4090 
 4091   op_cost(10);
 4092   format %{"[$reg + $off + $lreg]" %}
 4093   interface(MEMORY_INTER) %{
 4094     base($reg);
 4095     index($lreg);
 4096     scale(0x0);
 4097     disp($off);
 4098   %}
 4099 %}
 4100 
 4101 // Indirect Memory Plus Index Register Plus Offset Operand
 4102 operand indIndexNarrow(rRegN reg, rRegL lreg)
 4103 %{
 4104   predicate(CompressedOops::shift() == 0);
 4105   constraint(ALLOC_IN_RC(ptr_reg));
 4106   match(AddP (DecodeN reg) lreg);
 4107 
 4108   op_cost(10);
 4109   format %{"[$reg + $lreg]" %}
 4110   interface(MEMORY_INTER) %{
 4111     base($reg);
 4112     index($lreg);
 4113     scale(0x0);
 4114     disp(0x0);
 4115   %}
 4116 %}
 4117 
 4118 // Indirect Memory Times Scale Plus Index Register
 4119 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 4120 %{
 4121   predicate(CompressedOops::shift() == 0);
 4122   constraint(ALLOC_IN_RC(ptr_reg));
 4123   match(AddP (DecodeN reg) (LShiftL lreg scale));
 4124 
 4125   op_cost(10);
 4126   format %{"[$reg + $lreg << $scale]" %}
 4127   interface(MEMORY_INTER) %{
 4128     base($reg);
 4129     index($lreg);
 4130     scale($scale);
 4131     disp(0x0);
 4132   %}
 4133 %}
 4134 
 4135 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4136 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 4137 %{
 4138   predicate(CompressedOops::shift() == 0);
 4139   constraint(ALLOC_IN_RC(ptr_reg));
 4140   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 4141 
 4142   op_cost(10);
 4143   format %{"[$reg + $off + $lreg << $scale]" %}
 4144   interface(MEMORY_INTER) %{
 4145     base($reg);
 4146     index($lreg);
 4147     scale($scale);
 4148     disp($off);
 4149   %}
 4150 %}
 4151 
 4152 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 4153 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 4154 %{
 4155   constraint(ALLOC_IN_RC(ptr_reg));
 4156   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 4157   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 4158 
 4159   op_cost(10);
 4160   format %{"[$reg + $off + $idx]" %}
 4161   interface(MEMORY_INTER) %{
 4162     base($reg);
 4163     index($idx);
 4164     scale(0x0);
 4165     disp($off);
 4166   %}
 4167 %}
 4168 
 4169 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4170 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 4171 %{
 4172   constraint(ALLOC_IN_RC(ptr_reg));
 4173   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4174   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 4175 
 4176   op_cost(10);
 4177   format %{"[$reg + $off + $idx << $scale]" %}
 4178   interface(MEMORY_INTER) %{
 4179     base($reg);
 4180     index($idx);
 4181     scale($scale);
 4182     disp($off);
 4183   %}
 4184 %}
 4185 
 4186 //----------Special Memory Operands--------------------------------------------
 4187 // Stack Slot Operand - This operand is used for loading and storing temporary
 4188 //                      values on the stack where a match requires a value to
 4189 //                      flow through memory.
 4190 operand stackSlotP(sRegP reg)
 4191 %{
 4192   constraint(ALLOC_IN_RC(stack_slots));
 4193   // No match rule because this operand is only generated in matching
 4194 
 4195   format %{ "[$reg]" %}
 4196   interface(MEMORY_INTER) %{
 4197     base(0x4);   // RSP
 4198     index(0x4);  // No Index
 4199     scale(0x0);  // No Scale
 4200     disp($reg);  // Stack Offset
 4201   %}
 4202 %}
 4203 
 4204 operand stackSlotI(sRegI reg)
 4205 %{
 4206   constraint(ALLOC_IN_RC(stack_slots));
 4207   // No match rule because this operand is only generated in matching
 4208 
 4209   format %{ "[$reg]" %}
 4210   interface(MEMORY_INTER) %{
 4211     base(0x4);   // RSP
 4212     index(0x4);  // No Index
 4213     scale(0x0);  // No Scale
 4214     disp($reg);  // Stack Offset
 4215   %}
 4216 %}
 4217 
 4218 operand stackSlotF(sRegF reg)
 4219 %{
 4220   constraint(ALLOC_IN_RC(stack_slots));
 4221   // No match rule because this operand is only generated in matching
 4222 
 4223   format %{ "[$reg]" %}
 4224   interface(MEMORY_INTER) %{
 4225     base(0x4);   // RSP
 4226     index(0x4);  // No Index
 4227     scale(0x0);  // No Scale
 4228     disp($reg);  // Stack Offset
 4229   %}
 4230 %}
 4231 
 4232 operand stackSlotD(sRegD reg)
 4233 %{
 4234   constraint(ALLOC_IN_RC(stack_slots));
 4235   // No match rule because this operand is only generated in matching
 4236 
 4237   format %{ "[$reg]" %}
 4238   interface(MEMORY_INTER) %{
 4239     base(0x4);   // RSP
 4240     index(0x4);  // No Index
 4241     scale(0x0);  // No Scale
 4242     disp($reg);  // Stack Offset
 4243   %}
 4244 %}
 4245 operand stackSlotL(sRegL reg)
 4246 %{
 4247   constraint(ALLOC_IN_RC(stack_slots));
 4248   // No match rule because this operand is only generated in matching
 4249 
 4250   format %{ "[$reg]" %}
 4251   interface(MEMORY_INTER) %{
 4252     base(0x4);   // RSP
 4253     index(0x4);  // No Index
 4254     scale(0x0);  // No Scale
 4255     disp($reg);  // Stack Offset
 4256   %}
 4257 %}
 4258 
 4259 //----------Conditional Branch Operands----------------------------------------
 4260 // Comparison Op  - This is the operation of the comparison, and is limited to
 4261 //                  the following set of codes:
 4262 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4263 //
 4264 // Other attributes of the comparison, such as unsignedness, are specified
 4265 // by the comparison instruction that sets a condition code flags register.
 4266 // That result is represented by a flags operand whose subtype is appropriate
 4267 // to the unsignedness (etc.) of the comparison.
 4268 //
 4269 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4270 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4271 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4272 
 4273 // Comparison Code
 4274 operand cmpOp()
 4275 %{
 4276   match(Bool);
 4277 
 4278   format %{ "" %}
 4279   interface(COND_INTER) %{
 4280     equal(0x4, "e");
 4281     not_equal(0x5, "ne");
 4282     less(0xC, "l");
 4283     greater_equal(0xD, "ge");
 4284     less_equal(0xE, "le");
 4285     greater(0xF, "g");
 4286     overflow(0x0, "o");
 4287     no_overflow(0x1, "no");
 4288   %}
 4289 %}
 4290 
 4291 // Comparison Code, unsigned compare.  Used by FP also, with
 4292 // C2 (unordered) turned into GT or LT already.  The other bits
 4293 // C0 and C3 are turned into Carry & Zero flags.
 4294 operand cmpOpU()
 4295 %{
 4296   match(Bool);
 4297 
 4298   format %{ "" %}
 4299   interface(COND_INTER) %{
 4300     equal(0x4, "e");
 4301     not_equal(0x5, "ne");
 4302     less(0x2, "b");
 4303     greater_equal(0x3, "ae");
 4304     less_equal(0x6, "be");
 4305     greater(0x7, "a");
 4306     overflow(0x0, "o");
 4307     no_overflow(0x1, "no");
 4308   %}
 4309 %}
 4310 
 4311 
 4312 // Floating comparisons that don't require any fixup for the unordered case,
 4313 // If both inputs of the comparison are the same, ZF is always set so we
 4314 // don't need to use cmpOpUCF2 for eq/ne
 4315 operand cmpOpUCF() %{
 4316   match(Bool);
 4317   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4318             n->as_Bool()->_test._test == BoolTest::ge ||
 4319             n->as_Bool()->_test._test == BoolTest::le ||
 4320             n->as_Bool()->_test._test == BoolTest::gt ||
 4321             n->in(1)->in(1) == n->in(1)->in(2));
 4322   format %{ "" %}
 4323   interface(COND_INTER) %{
 4324     equal(0xb, "np");
 4325     not_equal(0xa, "p");
 4326     less(0x2, "b");
 4327     greater_equal(0x3, "ae");
 4328     less_equal(0x6, "be");
 4329     greater(0x7, "a");
 4330     overflow(0x0, "o");
 4331     no_overflow(0x1, "no");
 4332   %}
 4333 %}
 4334 
 4335 
 4336 // Floating comparisons that can be fixed up with extra conditional jumps
 4337 operand cmpOpUCF2() %{
 4338   match(Bool);
 4339   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 4340              n->as_Bool()->_test._test == BoolTest::eq) &&
 4341             n->in(1)->in(1) != n->in(1)->in(2));
 4342   format %{ "" %}
 4343   interface(COND_INTER) %{
 4344     equal(0x4, "e");
 4345     not_equal(0x5, "ne");
 4346     less(0x2, "b");
 4347     greater_equal(0x3, "ae");
 4348     less_equal(0x6, "be");
 4349     greater(0x7, "a");
 4350     overflow(0x0, "o");
 4351     no_overflow(0x1, "no");
 4352   %}
 4353 %}
 4354 
 4355 //----------OPERAND CLASSES----------------------------------------------------
 4356 // Operand Classes are groups of operands that are used as to simplify
 4357 // instruction definitions by not requiring the AD writer to specify separate
 4358 // instructions for every form of operand when the instruction accepts
 4359 // multiple operand types with the same basic encoding and format.  The classic
 4360 // case of this is memory operands.
 4361 
 4362 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4363                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4364                indCompressedOop, indCompressedOopOffset,
 4365                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4366                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4367                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4368 
 4369 //----------PIPELINE-----------------------------------------------------------
 4370 // Rules which define the behavior of the target architectures pipeline.
 4371 pipeline %{
 4372 
 4373 //----------ATTRIBUTES---------------------------------------------------------
 4374 attributes %{
 4375   variable_size_instructions;        // Fixed size instructions
 4376   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4377   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4378   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4379   instruction_fetch_units = 1;       // of 16 bytes
 4380 
 4381   // List of nop instructions
 4382   nops( MachNop );
 4383 %}
 4384 
 4385 //----------RESOURCES----------------------------------------------------------
 4386 // Resources are the functional units available to the machine
 4387 
 4388 // Generic P2/P3 pipeline
 4389 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4390 // 3 instructions decoded per cycle.
 4391 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4392 // 3 ALU op, only ALU0 handles mul instructions.
 4393 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4394            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 4395            BR, FPU,
 4396            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 4397 
 4398 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4399 // Pipeline Description specifies the stages in the machine's pipeline
 4400 
 4401 // Generic P2/P3 pipeline
 4402 pipe_desc(S0, S1, S2, S3, S4, S5);
 4403 
 4404 //----------PIPELINE CLASSES---------------------------------------------------
 4405 // Pipeline Classes describe the stages in which input and output are
 4406 // referenced by the hardware pipeline.
 4407 
 4408 // Naming convention: ialu or fpu
 4409 // Then: _reg
 4410 // Then: _reg if there is a 2nd register
 4411 // Then: _long if it's a pair of instructions implementing a long
 4412 // Then: _fat if it requires the big decoder
 4413 //   Or: _mem if it requires the big decoder and a memory unit.
 4414 
 4415 // Integer ALU reg operation
 4416 pipe_class ialu_reg(rRegI dst)
 4417 %{
 4418     single_instruction;
 4419     dst    : S4(write);
 4420     dst    : S3(read);
 4421     DECODE : S0;        // any decoder
 4422     ALU    : S3;        // any alu
 4423 %}
 4424 
 4425 // Long ALU reg operation
 4426 pipe_class ialu_reg_long(rRegL dst)
 4427 %{
 4428     instruction_count(2);
 4429     dst    : S4(write);
 4430     dst    : S3(read);
 4431     DECODE : S0(2);     // any 2 decoders
 4432     ALU    : S3(2);     // both alus
 4433 %}
 4434 
 4435 // Integer ALU reg operation using big decoder
 4436 pipe_class ialu_reg_fat(rRegI dst)
 4437 %{
 4438     single_instruction;
 4439     dst    : S4(write);
 4440     dst    : S3(read);
 4441     D0     : S0;        // big decoder only
 4442     ALU    : S3;        // any alu
 4443 %}
 4444 
 4445 // Integer ALU reg-reg operation
 4446 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 4447 %{
 4448     single_instruction;
 4449     dst    : S4(write);
 4450     src    : S3(read);
 4451     DECODE : S0;        // any decoder
 4452     ALU    : S3;        // any alu
 4453 %}
 4454 
 4455 // Integer ALU reg-reg operation
 4456 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 4457 %{
 4458     single_instruction;
 4459     dst    : S4(write);
 4460     src    : S3(read);
 4461     D0     : S0;        // big decoder only
 4462     ALU    : S3;        // any alu
 4463 %}
 4464 
 4465 // Integer ALU reg-mem operation
 4466 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 4467 %{
 4468     single_instruction;
 4469     dst    : S5(write);
 4470     mem    : S3(read);
 4471     D0     : S0;        // big decoder only
 4472     ALU    : S4;        // any alu
 4473     MEM    : S3;        // any mem
 4474 %}
 4475 
 4476 // Integer mem operation (prefetch)
 4477 pipe_class ialu_mem(memory mem)
 4478 %{
 4479     single_instruction;
 4480     mem    : S3(read);
 4481     D0     : S0;        // big decoder only
 4482     MEM    : S3;        // any mem
 4483 %}
 4484 
 4485 // Integer Store to Memory
 4486 pipe_class ialu_mem_reg(memory mem, rRegI src)
 4487 %{
 4488     single_instruction;
 4489     mem    : S3(read);
 4490     src    : S5(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S4;        // any alu
 4493     MEM    : S3;
 4494 %}
 4495 
 4496 // // Long Store to Memory
 4497 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 4498 // %{
 4499 //     instruction_count(2);
 4500 //     mem    : S3(read);
 4501 //     src    : S5(read);
 4502 //     D0     : S0(2);          // big decoder only; twice
 4503 //     ALU    : S4(2);     // any 2 alus
 4504 //     MEM    : S3(2);  // Both mems
 4505 // %}
 4506 
 4507 // Integer Store to Memory
 4508 pipe_class ialu_mem_imm(memory mem)
 4509 %{
 4510     single_instruction;
 4511     mem    : S3(read);
 4512     D0     : S0;        // big decoder only
 4513     ALU    : S4;        // any alu
 4514     MEM    : S3;
 4515 %}
 4516 
 4517 // Integer ALU0 reg-reg operation
 4518 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 4519 %{
 4520     single_instruction;
 4521     dst    : S4(write);
 4522     src    : S3(read);
 4523     D0     : S0;        // Big decoder only
 4524     ALU0   : S3;        // only alu0
 4525 %}
 4526 
 4527 // Integer ALU0 reg-mem operation
 4528 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 4529 %{
 4530     single_instruction;
 4531     dst    : S5(write);
 4532     mem    : S3(read);
 4533     D0     : S0;        // big decoder only
 4534     ALU0   : S4;        // ALU0 only
 4535     MEM    : S3;        // any mem
 4536 %}
 4537 
 4538 // Integer ALU reg-reg operation
 4539 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 4540 %{
 4541     single_instruction;
 4542     cr     : S4(write);
 4543     src1   : S3(read);
 4544     src2   : S3(read);
 4545     DECODE : S0;        // any decoder
 4546     ALU    : S3;        // any alu
 4547 %}
 4548 
 4549 // Integer ALU reg-imm operation
 4550 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 4551 %{
 4552     single_instruction;
 4553     cr     : S4(write);
 4554     src1   : S3(read);
 4555     DECODE : S0;        // any decoder
 4556     ALU    : S3;        // any alu
 4557 %}
 4558 
 4559 // Integer ALU reg-mem operation
 4560 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 4561 %{
 4562     single_instruction;
 4563     cr     : S4(write);
 4564     src1   : S3(read);
 4565     src2   : S3(read);
 4566     D0     : S0;        // big decoder only
 4567     ALU    : S4;        // any alu
 4568     MEM    : S3;
 4569 %}
 4570 
 4571 // Conditional move reg-reg
 4572 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 4573 %{
 4574     instruction_count(4);
 4575     y      : S4(read);
 4576     q      : S3(read);
 4577     p      : S3(read);
 4578     DECODE : S0(4);     // any decoder
 4579 %}
 4580 
 4581 // Conditional move reg-reg
 4582 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 4583 %{
 4584     single_instruction;
 4585     dst    : S4(write);
 4586     src    : S3(read);
 4587     cr     : S3(read);
 4588     DECODE : S0;        // any decoder
 4589 %}
 4590 
 4591 // Conditional move reg-mem
 4592 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 4593 %{
 4594     single_instruction;
 4595     dst    : S4(write);
 4596     src    : S3(read);
 4597     cr     : S3(read);
 4598     DECODE : S0;        // any decoder
 4599     MEM    : S3;
 4600 %}
 4601 
 4602 // Conditional move reg-reg long
 4603 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 4604 %{
 4605     single_instruction;
 4606     dst    : S4(write);
 4607     src    : S3(read);
 4608     cr     : S3(read);
 4609     DECODE : S0(2);     // any 2 decoders
 4610 %}
 4611 
 4612 // XXX
 4613 // // Conditional move double reg-reg
 4614 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
 4615 // %{
 4616 //     single_instruction;
 4617 //     dst    : S4(write);
 4618 //     src    : S3(read);
 4619 //     cr     : S3(read);
 4620 //     DECODE : S0;     // any decoder
 4621 // %}
 4622 
 4623 // Float reg-reg operation
 4624 pipe_class fpu_reg(regD dst)
 4625 %{
 4626     instruction_count(2);
 4627     dst    : S3(read);
 4628     DECODE : S0(2);     // any 2 decoders
 4629     FPU    : S3;
 4630 %}
 4631 
 4632 // Float reg-reg operation
 4633 pipe_class fpu_reg_reg(regD dst, regD src)
 4634 %{
 4635     instruction_count(2);
 4636     dst    : S4(write);
 4637     src    : S3(read);
 4638     DECODE : S0(2);     // any 2 decoders
 4639     FPU    : S3;
 4640 %}
 4641 
 4642 // Float reg-reg operation
 4643 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 4644 %{
 4645     instruction_count(3);
 4646     dst    : S4(write);
 4647     src1   : S3(read);
 4648     src2   : S3(read);
 4649     DECODE : S0(3);     // any 3 decoders
 4650     FPU    : S3(2);
 4651 %}
 4652 
 4653 // Float reg-reg operation
 4654 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 4655 %{
 4656     instruction_count(4);
 4657     dst    : S4(write);
 4658     src1   : S3(read);
 4659     src2   : S3(read);
 4660     src3   : S3(read);
 4661     DECODE : S0(4);     // any 3 decoders
 4662     FPU    : S3(2);
 4663 %}
 4664 
 4665 // Float reg-reg operation
 4666 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 4667 %{
 4668     instruction_count(4);
 4669     dst    : S4(write);
 4670     src1   : S3(read);
 4671     src2   : S3(read);
 4672     src3   : S3(read);
 4673     DECODE : S1(3);     // any 3 decoders
 4674     D0     : S0;        // Big decoder only
 4675     FPU    : S3(2);
 4676     MEM    : S3;
 4677 %}
 4678 
 4679 // Float reg-mem operation
 4680 pipe_class fpu_reg_mem(regD dst, memory mem)
 4681 %{
 4682     instruction_count(2);
 4683     dst    : S5(write);
 4684     mem    : S3(read);
 4685     D0     : S0;        // big decoder only
 4686     DECODE : S1;        // any decoder for FPU POP
 4687     FPU    : S4;
 4688     MEM    : S3;        // any mem
 4689 %}
 4690 
 4691 // Float reg-mem operation
 4692 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 4693 %{
 4694     instruction_count(3);
 4695     dst    : S5(write);
 4696     src1   : S3(read);
 4697     mem    : S3(read);
 4698     D0     : S0;        // big decoder only
 4699     DECODE : S1(2);     // any decoder for FPU POP
 4700     FPU    : S4;
 4701     MEM    : S3;        // any mem
 4702 %}
 4703 
 4704 // Float mem-reg operation
 4705 pipe_class fpu_mem_reg(memory mem, regD src)
 4706 %{
 4707     instruction_count(2);
 4708     src    : S5(read);
 4709     mem    : S3(read);
 4710     DECODE : S0;        // any decoder for FPU PUSH
 4711     D0     : S1;        // big decoder only
 4712     FPU    : S4;
 4713     MEM    : S3;        // any mem
 4714 %}
 4715 
 4716 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 4717 %{
 4718     instruction_count(3);
 4719     src1   : S3(read);
 4720     src2   : S3(read);
 4721     mem    : S3(read);
 4722     DECODE : S0(2);     // any decoder for FPU PUSH
 4723     D0     : S1;        // big decoder only
 4724     FPU    : S4;
 4725     MEM    : S3;        // any mem
 4726 %}
 4727 
 4728 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 4729 %{
 4730     instruction_count(3);
 4731     src1   : S3(read);
 4732     src2   : S3(read);
 4733     mem    : S4(read);
 4734     DECODE : S0;        // any decoder for FPU PUSH
 4735     D0     : S0(2);     // big decoder only
 4736     FPU    : S4;
 4737     MEM    : S3(2);     // any mem
 4738 %}
 4739 
 4740 pipe_class fpu_mem_mem(memory dst, memory src1)
 4741 %{
 4742     instruction_count(2);
 4743     src1   : S3(read);
 4744     dst    : S4(read);
 4745     D0     : S0(2);     // big decoder only
 4746     MEM    : S3(2);     // any mem
 4747 %}
 4748 
 4749 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 4750 %{
 4751     instruction_count(3);
 4752     src1   : S3(read);
 4753     src2   : S3(read);
 4754     dst    : S4(read);
 4755     D0     : S0(3);     // big decoder only
 4756     FPU    : S4;
 4757     MEM    : S3(3);     // any mem
 4758 %}
 4759 
 4760 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 4761 %{
 4762     instruction_count(3);
 4763     src1   : S4(read);
 4764     mem    : S4(read);
 4765     DECODE : S0;        // any decoder for FPU PUSH
 4766     D0     : S0(2);     // big decoder only
 4767     FPU    : S4;
 4768     MEM    : S3(2);     // any mem
 4769 %}
 4770 
 4771 // Float load constant
 4772 pipe_class fpu_reg_con(regD dst)
 4773 %{
 4774     instruction_count(2);
 4775     dst    : S5(write);
 4776     D0     : S0;        // big decoder only for the load
 4777     DECODE : S1;        // any decoder for FPU POP
 4778     FPU    : S4;
 4779     MEM    : S3;        // any mem
 4780 %}
 4781 
 4782 // Float load constant
 4783 pipe_class fpu_reg_reg_con(regD dst, regD src)
 4784 %{
 4785     instruction_count(3);
 4786     dst    : S5(write);
 4787     src    : S3(read);
 4788     D0     : S0;        // big decoder only for the load
 4789     DECODE : S1(2);     // any decoder for FPU POP
 4790     FPU    : S4;
 4791     MEM    : S3;        // any mem
 4792 %}
 4793 
 4794 // UnConditional branch
 4795 pipe_class pipe_jmp(label labl)
 4796 %{
 4797     single_instruction;
 4798     BR   : S3;
 4799 %}
 4800 
 4801 // Conditional branch
 4802 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 4803 %{
 4804     single_instruction;
 4805     cr    : S1(read);
 4806     BR    : S3;
 4807 %}
 4808 
 4809 // Allocation idiom
 4810 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 4811 %{
 4812     instruction_count(1); force_serialization;
 4813     fixed_latency(6);
 4814     heap_ptr : S3(read);
 4815     DECODE   : S0(3);
 4816     D0       : S2;
 4817     MEM      : S3;
 4818     ALU      : S3(2);
 4819     dst      : S5(write);
 4820     BR       : S5;
 4821 %}
 4822 
 4823 // Generic big/slow expanded idiom
 4824 pipe_class pipe_slow()
 4825 %{
 4826     instruction_count(10); multiple_bundles; force_serialization;
 4827     fixed_latency(100);
 4828     D0  : S0(2);
 4829     MEM : S3(2);
 4830 %}
 4831 
 4832 // The real do-nothing guy
 4833 pipe_class empty()
 4834 %{
 4835     instruction_count(0);
 4836 %}
 4837 
 4838 // Define the class for the Nop node
 4839 define
 4840 %{
 4841    MachNop = empty;
 4842 %}
 4843 
 4844 %}
 4845 
 4846 //----------INSTRUCTIONS-------------------------------------------------------
 4847 //
 4848 // match      -- States which machine-independent subtree may be replaced
 4849 //               by this instruction.
 4850 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4851 //               selection to identify a minimum cost tree of machine
 4852 //               instructions that matches a tree of machine-independent
 4853 //               instructions.
 4854 // format     -- A string providing the disassembly for this instruction.
 4855 //               The value of an instruction's operand may be inserted
 4856 //               by referring to it with a '$' prefix.
 4857 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4858 //               to within an encode class as $primary, $secondary, and $tertiary
 4859 //               rrspectively.  The primary opcode is commonly used to
 4860 //               indicate the type of machine instruction, while secondary
 4861 //               and tertiary are often used for prefix options or addressing
 4862 //               modes.
 4863 // ins_encode -- A list of encode classes with parameters. The encode class
 4864 //               name must have been defined in an 'enc_class' specification
 4865 //               in the encode section of the architecture description.
 4866 
 4867 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4868 // Load Float
 4869 instruct MoveF2VL(vlRegF dst, regF src) %{
 4870   match(Set dst src);
 4871   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4872   ins_encode %{
 4873     ShouldNotReachHere();
 4874   %}
 4875   ins_pipe( fpu_reg_reg );
 4876 %}
 4877 
 4878 // Load Float
 4879 instruct MoveF2LEG(legRegF dst, regF src) %{
 4880   match(Set dst src);
 4881   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4882   ins_encode %{
 4883     ShouldNotReachHere();
 4884   %}
 4885   ins_pipe( fpu_reg_reg );
 4886 %}
 4887 
 4888 // Load Float
 4889 instruct MoveVL2F(regF dst, vlRegF src) %{
 4890   match(Set dst src);
 4891   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4892   ins_encode %{
 4893     ShouldNotReachHere();
 4894   %}
 4895   ins_pipe( fpu_reg_reg );
 4896 %}
 4897 
 4898 // Load Float
 4899 instruct MoveLEG2F(regF dst, legRegF src) %{
 4900   match(Set dst src);
 4901   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4902   ins_encode %{
 4903     ShouldNotReachHere();
 4904   %}
 4905   ins_pipe( fpu_reg_reg );
 4906 %}
 4907 
 4908 // Load Double
 4909 instruct MoveD2VL(vlRegD dst, regD src) %{
 4910   match(Set dst src);
 4911   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4912   ins_encode %{
 4913     ShouldNotReachHere();
 4914   %}
 4915   ins_pipe( fpu_reg_reg );
 4916 %}
 4917 
 4918 // Load Double
 4919 instruct MoveD2LEG(legRegD dst, regD src) %{
 4920   match(Set dst src);
 4921   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4922   ins_encode %{
 4923     ShouldNotReachHere();
 4924   %}
 4925   ins_pipe( fpu_reg_reg );
 4926 %}
 4927 
 4928 // Load Double
 4929 instruct MoveVL2D(regD dst, vlRegD src) %{
 4930   match(Set dst src);
 4931   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4932   ins_encode %{
 4933     ShouldNotReachHere();
 4934   %}
 4935   ins_pipe( fpu_reg_reg );
 4936 %}
 4937 
 4938 // Load Double
 4939 instruct MoveLEG2D(regD dst, legRegD src) %{
 4940   match(Set dst src);
 4941   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4942   ins_encode %{
 4943     ShouldNotReachHere();
 4944   %}
 4945   ins_pipe( fpu_reg_reg );
 4946 %}
 4947 
 4948 //----------Load/Store/Move Instructions---------------------------------------
 4949 //----------Load Instructions--------------------------------------------------
 4950 
 4951 // Load Byte (8 bit signed)
 4952 instruct loadB(rRegI dst, memory mem)
 4953 %{
 4954   match(Set dst (LoadB mem));
 4955 
 4956   ins_cost(125);
 4957   format %{ "movsbl  $dst, $mem\t# byte" %}
 4958 
 4959   ins_encode %{
 4960     __ movsbl($dst$$Register, $mem$$Address);
 4961   %}
 4962 
 4963   ins_pipe(ialu_reg_mem);
 4964 %}
 4965 
 4966 // Load Byte (8 bit signed) into Long Register
 4967 instruct loadB2L(rRegL dst, memory mem)
 4968 %{
 4969   match(Set dst (ConvI2L (LoadB mem)));
 4970 
 4971   ins_cost(125);
 4972   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4973 
 4974   ins_encode %{
 4975     __ movsbq($dst$$Register, $mem$$Address);
 4976   %}
 4977 
 4978   ins_pipe(ialu_reg_mem);
 4979 %}
 4980 
 4981 // Load Unsigned Byte (8 bit UNsigned)
 4982 instruct loadUB(rRegI dst, memory mem)
 4983 %{
 4984   match(Set dst (LoadUB mem));
 4985 
 4986   ins_cost(125);
 4987   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4988 
 4989   ins_encode %{
 4990     __ movzbl($dst$$Register, $mem$$Address);
 4991   %}
 4992 
 4993   ins_pipe(ialu_reg_mem);
 4994 %}
 4995 
 4996 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4997 instruct loadUB2L(rRegL dst, memory mem)
 4998 %{
 4999   match(Set dst (ConvI2L (LoadUB mem)));
 5000 
 5001   ins_cost(125);
 5002   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 5003 
 5004   ins_encode %{
 5005     __ movzbq($dst$$Register, $mem$$Address);
 5006   %}
 5007 
 5008   ins_pipe(ialu_reg_mem);
 5009 %}
 5010 
 5011 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 5012 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5013   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5014   effect(KILL cr);
 5015 
 5016   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 5017             "andl    $dst, right_n_bits($mask, 8)" %}
 5018   ins_encode %{
 5019     Register Rdst = $dst$$Register;
 5020     __ movzbq(Rdst, $mem$$Address);
 5021     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5022   %}
 5023   ins_pipe(ialu_reg_mem);
 5024 %}
 5025 
 5026 // Load Short (16 bit signed)
 5027 instruct loadS(rRegI dst, memory mem)
 5028 %{
 5029   match(Set dst (LoadS mem));
 5030 
 5031   ins_cost(125);
 5032   format %{ "movswl $dst, $mem\t# short" %}
 5033 
 5034   ins_encode %{
 5035     __ movswl($dst$$Register, $mem$$Address);
 5036   %}
 5037 
 5038   ins_pipe(ialu_reg_mem);
 5039 %}
 5040 
 5041 // Load Short (16 bit signed) to Byte (8 bit signed)
 5042 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5043   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5044 
 5045   ins_cost(125);
 5046   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 5047   ins_encode %{
 5048     __ movsbl($dst$$Register, $mem$$Address);
 5049   %}
 5050   ins_pipe(ialu_reg_mem);
 5051 %}
 5052 
 5053 // Load Short (16 bit signed) into Long Register
 5054 instruct loadS2L(rRegL dst, memory mem)
 5055 %{
 5056   match(Set dst (ConvI2L (LoadS mem)));
 5057 
 5058   ins_cost(125);
 5059   format %{ "movswq $dst, $mem\t# short -> long" %}
 5060 
 5061   ins_encode %{
 5062     __ movswq($dst$$Register, $mem$$Address);
 5063   %}
 5064 
 5065   ins_pipe(ialu_reg_mem);
 5066 %}
 5067 
 5068 // Load Unsigned Short/Char (16 bit UNsigned)
 5069 instruct loadUS(rRegI dst, memory mem)
 5070 %{
 5071   match(Set dst (LoadUS mem));
 5072 
 5073   ins_cost(125);
 5074   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 5075 
 5076   ins_encode %{
 5077     __ movzwl($dst$$Register, $mem$$Address);
 5078   %}
 5079 
 5080   ins_pipe(ialu_reg_mem);
 5081 %}
 5082 
 5083 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5084 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5085   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5086 
 5087   ins_cost(125);
 5088   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 5089   ins_encode %{
 5090     __ movsbl($dst$$Register, $mem$$Address);
 5091   %}
 5092   ins_pipe(ialu_reg_mem);
 5093 %}
 5094 
 5095 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5096 instruct loadUS2L(rRegL dst, memory mem)
 5097 %{
 5098   match(Set dst (ConvI2L (LoadUS mem)));
 5099 
 5100   ins_cost(125);
 5101   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 5102 
 5103   ins_encode %{
 5104     __ movzwq($dst$$Register, $mem$$Address);
 5105   %}
 5106 
 5107   ins_pipe(ialu_reg_mem);
 5108 %}
 5109 
 5110 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5111 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5112   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5113 
 5114   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 5115   ins_encode %{
 5116     __ movzbq($dst$$Register, $mem$$Address);
 5117   %}
 5118   ins_pipe(ialu_reg_mem);
 5119 %}
 5120 
 5121 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 5122 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5123   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5124   effect(KILL cr);
 5125 
 5126   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5127             "andl    $dst, right_n_bits($mask, 16)" %}
 5128   ins_encode %{
 5129     Register Rdst = $dst$$Register;
 5130     __ movzwq(Rdst, $mem$$Address);
 5131     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5132   %}
 5133   ins_pipe(ialu_reg_mem);
 5134 %}
 5135 
 5136 // Load Integer
 5137 instruct loadI(rRegI dst, memory mem)
 5138 %{
 5139   match(Set dst (LoadI mem));
 5140 
 5141   ins_cost(125);
 5142   format %{ "movl    $dst, $mem\t# int" %}
 5143 
 5144   ins_encode %{
 5145     __ movl($dst$$Register, $mem$$Address);
 5146   %}
 5147 
 5148   ins_pipe(ialu_reg_mem);
 5149 %}
 5150 
 5151 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5152 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5153   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5154 
 5155   ins_cost(125);
 5156   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 5157   ins_encode %{
 5158     __ movsbl($dst$$Register, $mem$$Address);
 5159   %}
 5160   ins_pipe(ialu_reg_mem);
 5161 %}
 5162 
 5163 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5164 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5165   match(Set dst (AndI (LoadI mem) mask));
 5166 
 5167   ins_cost(125);
 5168   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 5169   ins_encode %{
 5170     __ movzbl($dst$$Register, $mem$$Address);
 5171   %}
 5172   ins_pipe(ialu_reg_mem);
 5173 %}
 5174 
 5175 // Load Integer (32 bit signed) to Short (16 bit signed)
 5176 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5177   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5178 
 5179   ins_cost(125);
 5180   format %{ "movswl  $dst, $mem\t# int -> short" %}
 5181   ins_encode %{
 5182     __ movswl($dst$$Register, $mem$$Address);
 5183   %}
 5184   ins_pipe(ialu_reg_mem);
 5185 %}
 5186 
 5187 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5188 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5189   match(Set dst (AndI (LoadI mem) mask));
 5190 
 5191   ins_cost(125);
 5192   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 5193   ins_encode %{
 5194     __ movzwl($dst$$Register, $mem$$Address);
 5195   %}
 5196   ins_pipe(ialu_reg_mem);
 5197 %}
 5198 
 5199 // Load Integer into Long Register
 5200 instruct loadI2L(rRegL dst, memory mem)
 5201 %{
 5202   match(Set dst (ConvI2L (LoadI mem)));
 5203 
 5204   ins_cost(125);
 5205   format %{ "movslq  $dst, $mem\t# int -> long" %}
 5206 
 5207   ins_encode %{
 5208     __ movslq($dst$$Register, $mem$$Address);
 5209   %}
 5210 
 5211   ins_pipe(ialu_reg_mem);
 5212 %}
 5213 
 5214 // Load Integer with mask 0xFF into Long Register
 5215 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5216   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5217 
 5218   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 5219   ins_encode %{
 5220     __ movzbq($dst$$Register, $mem$$Address);
 5221   %}
 5222   ins_pipe(ialu_reg_mem);
 5223 %}
 5224 
 5225 // Load Integer with mask 0xFFFF into Long Register
 5226 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 5227   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5228 
 5229   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 5230   ins_encode %{
 5231     __ movzwq($dst$$Register, $mem$$Address);
 5232   %}
 5233   ins_pipe(ialu_reg_mem);
 5234 %}
 5235 
 5236 // Load Integer with a 31-bit mask into Long Register
 5237 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 5238   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5239   effect(KILL cr);
 5240 
 5241   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 5242             "andl    $dst, $mask" %}
 5243   ins_encode %{
 5244     Register Rdst = $dst$$Register;
 5245     __ movl(Rdst, $mem$$Address);
 5246     __ andl(Rdst, $mask$$constant);
 5247   %}
 5248   ins_pipe(ialu_reg_mem);
 5249 %}
 5250 
 5251 // Load Unsigned Integer into Long Register
 5252 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 5253 %{
 5254   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5255 
 5256   ins_cost(125);
 5257   format %{ "movl    $dst, $mem\t# uint -> long" %}
 5258 
 5259   ins_encode %{
 5260     __ movl($dst$$Register, $mem$$Address);
 5261   %}
 5262 
 5263   ins_pipe(ialu_reg_mem);
 5264 %}
 5265 
 5266 // Load Long
 5267 instruct loadL(rRegL dst, memory mem)
 5268 %{
 5269   match(Set dst (LoadL mem));
 5270 
 5271   ins_cost(125);
 5272   format %{ "movq    $dst, $mem\t# long" %}
 5273 
 5274   ins_encode %{
 5275     __ movq($dst$$Register, $mem$$Address);
 5276   %}
 5277 
 5278   ins_pipe(ialu_reg_mem); // XXX
 5279 %}
 5280 
 5281 // Load Range
 5282 instruct loadRange(rRegI dst, memory mem)
 5283 %{
 5284   match(Set dst (LoadRange mem));
 5285 
 5286   ins_cost(125); // XXX
 5287   format %{ "movl    $dst, $mem\t# range" %}
 5288   ins_encode %{
 5289     __ movl($dst$$Register, $mem$$Address);
 5290   %}
 5291   ins_pipe(ialu_reg_mem);
 5292 %}
 5293 
 5294 // Load Pointer
 5295 instruct loadP(rRegP dst, memory mem)
 5296 %{
 5297   match(Set dst (LoadP mem));
 5298   predicate(n->as_Load()->barrier_data() == 0);
 5299 
 5300   ins_cost(125); // XXX
 5301   format %{ "movq    $dst, $mem\t# ptr" %}
 5302   ins_encode %{
 5303     __ movq($dst$$Register, $mem$$Address);
 5304   %}
 5305   ins_pipe(ialu_reg_mem); // XXX
 5306 %}
 5307 
 5308 // Load Compressed Pointer
 5309 instruct loadN(rRegN dst, memory mem)
 5310 %{
 5311    match(Set dst (LoadN mem));
 5312 
 5313    ins_cost(125); // XXX
 5314    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 5315    ins_encode %{
 5316      __ movl($dst$$Register, $mem$$Address);
 5317    %}
 5318    ins_pipe(ialu_reg_mem); // XXX
 5319 %}
 5320 
 5321 
 5322 // Load Klass Pointer
 5323 instruct loadKlass(rRegP dst, memory mem)
 5324 %{
 5325   match(Set dst (LoadKlass mem));
 5326 
 5327   ins_cost(125); // XXX
 5328   format %{ "movq    $dst, $mem\t# class" %}
 5329   ins_encode %{
 5330     __ movq($dst$$Register, $mem$$Address);
 5331   %}
 5332   ins_pipe(ialu_reg_mem); // XXX
 5333 %}
 5334 
 5335 // Load narrow Klass Pointer
 5336 instruct loadNKlass(rRegN dst, memory mem)
 5337 %{
 5338   match(Set dst (LoadNKlass mem));
 5339 
 5340   ins_cost(125); // XXX
 5341   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5342   ins_encode %{
 5343     __ movl($dst$$Register, $mem$$Address);
 5344   %}
 5345   ins_pipe(ialu_reg_mem); // XXX
 5346 %}
 5347 
 5348 // Load Float
 5349 instruct loadF(regF dst, memory mem)
 5350 %{
 5351   match(Set dst (LoadF mem));
 5352 
 5353   ins_cost(145); // XXX
 5354   format %{ "movss   $dst, $mem\t# float" %}
 5355   ins_encode %{
 5356     __ movflt($dst$$XMMRegister, $mem$$Address);
 5357   %}
 5358   ins_pipe(pipe_slow); // XXX
 5359 %}
 5360 
 5361 // Load Double
 5362 instruct loadD_partial(regD dst, memory mem)
 5363 %{
 5364   predicate(!UseXmmLoadAndClearUpper);
 5365   match(Set dst (LoadD mem));
 5366 
 5367   ins_cost(145); // XXX
 5368   format %{ "movlpd  $dst, $mem\t# double" %}
 5369   ins_encode %{
 5370     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5371   %}
 5372   ins_pipe(pipe_slow); // XXX
 5373 %}
 5374 
 5375 instruct loadD(regD dst, memory mem)
 5376 %{
 5377   predicate(UseXmmLoadAndClearUpper);
 5378   match(Set dst (LoadD mem));
 5379 
 5380   ins_cost(145); // XXX
 5381   format %{ "movsd   $dst, $mem\t# double" %}
 5382   ins_encode %{
 5383     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5384   %}
 5385   ins_pipe(pipe_slow); // XXX
 5386 %}
 5387 
 5388 
 5389 // Following pseudo code describes the algorithm for max[FD]:
 5390 // Min algorithm is on similar lines
 5391 //  btmp = (b < +0.0) ? a : b
 5392 //  atmp = (b < +0.0) ? b : a
 5393 //  Tmp  = Max_Float(atmp , btmp)
 5394 //  Res  = (atmp == NaN) ? atmp : Tmp
 5395 
 5396 // max = java.lang.Math.max(float a, float b)
 5397 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5398   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5399   match(Set dst (MaxF a b));
 5400   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5401   format %{
 5402      "vblendvps        $btmp,$b,$a,$b           \n\t"
 5403      "vblendvps        $atmp,$a,$b,$b           \n\t"
 5404      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 5405      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 5406      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 5407   %}
 5408   ins_encode %{
 5409     int vector_len = Assembler::AVX_128bit;
 5410     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5411     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5412     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5413     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5414     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5415  %}
 5416   ins_pipe( pipe_slow );
 5417 %}
 5418 
 5419 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5420   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5421   match(Set dst (MaxF a b));
 5422   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5423 
 5424   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 5425   ins_encode %{
 5426     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5427                     false /*min*/, true /*single*/);
 5428   %}
 5429   ins_pipe( pipe_slow );
 5430 %}
 5431 
 5432 // max = java.lang.Math.max(double a, double b)
 5433 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5434   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5435   match(Set dst (MaxD a b));
 5436   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 5437   format %{
 5438      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 5439      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 5440      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 5441      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 5442      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 5443   %}
 5444   ins_encode %{
 5445     int vector_len = Assembler::AVX_128bit;
 5446     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5447     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5448     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5449     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5450     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5451   %}
 5452   ins_pipe( pipe_slow );
 5453 %}
 5454 
 5455 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5456   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5457   match(Set dst (MaxD a b));
 5458   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5459 
 5460   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 5461   ins_encode %{
 5462     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5463                     false /*min*/, false /*single*/);
 5464   %}
 5465   ins_pipe( pipe_slow );
 5466 %}
 5467 
 5468 // min = java.lang.Math.min(float a, float b)
 5469 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5470   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5471   match(Set dst (MinF a b));
 5472   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5473   format %{
 5474      "vblendvps        $atmp,$a,$b,$a             \n\t"
 5475      "vblendvps        $btmp,$b,$a,$a             \n\t"
 5476      "vminss           $tmp,$atmp,$btmp           \n\t"
 5477      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 5478      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 5479   %}
 5480   ins_encode %{
 5481     int vector_len = Assembler::AVX_128bit;
 5482     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5483     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5484     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5485     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5486     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5487   %}
 5488   ins_pipe( pipe_slow );
 5489 %}
 5490 
 5491 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5492   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5493   match(Set dst (MinF a b));
 5494   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5495 
 5496   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 5497   ins_encode %{
 5498     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5499                     true /*min*/, true /*single*/);
 5500   %}
 5501   ins_pipe( pipe_slow );
 5502 %}
 5503 
 5504 // min = java.lang.Math.min(double a, double b)
 5505 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5506   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5507   match(Set dst (MinD a b));
 5508   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5509   format %{
 5510      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 5511      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 5512      "vminsd           $tmp,$atmp,$btmp         \n\t"
 5513      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 5514      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 5515   %}
 5516   ins_encode %{
 5517     int vector_len = Assembler::AVX_128bit;
 5518     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5519     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5520     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5521     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5522     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5523   %}
 5524   ins_pipe( pipe_slow );
 5525 %}
 5526 
 5527 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5528   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5529   match(Set dst (MinD a b));
 5530   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5531 
 5532   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 5533   ins_encode %{
 5534     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5535                     true /*min*/, false /*single*/);
 5536   %}
 5537   ins_pipe( pipe_slow );
 5538 %}
 5539 
 5540 // Load Effective Address
 5541 instruct leaP8(rRegP dst, indOffset8 mem)
 5542 %{
 5543   match(Set dst mem);
 5544 
 5545   ins_cost(110); // XXX
 5546   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 5547   ins_encode %{
 5548     __ leaq($dst$$Register, $mem$$Address);
 5549   %}
 5550   ins_pipe(ialu_reg_reg_fat);
 5551 %}
 5552 
 5553 instruct leaP32(rRegP dst, indOffset32 mem)
 5554 %{
 5555   match(Set dst mem);
 5556 
 5557   ins_cost(110);
 5558   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 5559   ins_encode %{
 5560     __ leaq($dst$$Register, $mem$$Address);
 5561   %}
 5562   ins_pipe(ialu_reg_reg_fat);
 5563 %}
 5564 
 5565 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 5566 %{
 5567   match(Set dst mem);
 5568 
 5569   ins_cost(110);
 5570   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 5571   ins_encode %{
 5572     __ leaq($dst$$Register, $mem$$Address);
 5573   %}
 5574   ins_pipe(ialu_reg_reg_fat);
 5575 %}
 5576 
 5577 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 5578 %{
 5579   match(Set dst mem);
 5580 
 5581   ins_cost(110);
 5582   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5583   ins_encode %{
 5584     __ leaq($dst$$Register, $mem$$Address);
 5585   %}
 5586   ins_pipe(ialu_reg_reg_fat);
 5587 %}
 5588 
 5589 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 5590 %{
 5591   match(Set dst mem);
 5592 
 5593   ins_cost(110);
 5594   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5595   ins_encode %{
 5596     __ leaq($dst$$Register, $mem$$Address);
 5597   %}
 5598   ins_pipe(ialu_reg_reg_fat);
 5599 %}
 5600 
 5601 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 5602 %{
 5603   match(Set dst mem);
 5604 
 5605   ins_cost(110);
 5606   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 5607   ins_encode %{
 5608     __ leaq($dst$$Register, $mem$$Address);
 5609   %}
 5610   ins_pipe(ialu_reg_reg_fat);
 5611 %}
 5612 
 5613 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 5614 %{
 5615   match(Set dst mem);
 5616 
 5617   ins_cost(110);
 5618   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 5619   ins_encode %{
 5620     __ leaq($dst$$Register, $mem$$Address);
 5621   %}
 5622   ins_pipe(ialu_reg_reg_fat);
 5623 %}
 5624 
 5625 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 5626 %{
 5627   match(Set dst mem);
 5628 
 5629   ins_cost(110);
 5630   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 5631   ins_encode %{
 5632     __ leaq($dst$$Register, $mem$$Address);
 5633   %}
 5634   ins_pipe(ialu_reg_reg_fat);
 5635 %}
 5636 
 5637 // Load Effective Address which uses Narrow (32-bits) oop
 5638 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 5639 %{
 5640   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 5641   match(Set dst mem);
 5642 
 5643   ins_cost(110);
 5644   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 5645   ins_encode %{
 5646     __ leaq($dst$$Register, $mem$$Address);
 5647   %}
 5648   ins_pipe(ialu_reg_reg_fat);
 5649 %}
 5650 
 5651 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 5652 %{
 5653   predicate(CompressedOops::shift() == 0);
 5654   match(Set dst mem);
 5655 
 5656   ins_cost(110); // XXX
 5657   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 5658   ins_encode %{
 5659     __ leaq($dst$$Register, $mem$$Address);
 5660   %}
 5661   ins_pipe(ialu_reg_reg_fat);
 5662 %}
 5663 
 5664 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 5665 %{
 5666   predicate(CompressedOops::shift() == 0);
 5667   match(Set dst mem);
 5668 
 5669   ins_cost(110);
 5670   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 5671   ins_encode %{
 5672     __ leaq($dst$$Register, $mem$$Address);
 5673   %}
 5674   ins_pipe(ialu_reg_reg_fat);
 5675 %}
 5676 
 5677 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 5678 %{
 5679   predicate(CompressedOops::shift() == 0);
 5680   match(Set dst mem);
 5681 
 5682   ins_cost(110);
 5683   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 5684   ins_encode %{
 5685     __ leaq($dst$$Register, $mem$$Address);
 5686   %}
 5687   ins_pipe(ialu_reg_reg_fat);
 5688 %}
 5689 
 5690 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 5691 %{
 5692   predicate(CompressedOops::shift() == 0);
 5693   match(Set dst mem);
 5694 
 5695   ins_cost(110);
 5696   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 5697   ins_encode %{
 5698     __ leaq($dst$$Register, $mem$$Address);
 5699   %}
 5700   ins_pipe(ialu_reg_reg_fat);
 5701 %}
 5702 
 5703 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 5704 %{
 5705   predicate(CompressedOops::shift() == 0);
 5706   match(Set dst mem);
 5707 
 5708   ins_cost(110);
 5709   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 5710   ins_encode %{
 5711     __ leaq($dst$$Register, $mem$$Address);
 5712   %}
 5713   ins_pipe(ialu_reg_reg_fat);
 5714 %}
 5715 
 5716 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 5717 %{
 5718   predicate(CompressedOops::shift() == 0);
 5719   match(Set dst mem);
 5720 
 5721   ins_cost(110);
 5722   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 5723   ins_encode %{
 5724     __ leaq($dst$$Register, $mem$$Address);
 5725   %}
 5726   ins_pipe(ialu_reg_reg_fat);
 5727 %}
 5728 
 5729 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 5730 %{
 5731   predicate(CompressedOops::shift() == 0);
 5732   match(Set dst mem);
 5733 
 5734   ins_cost(110);
 5735   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 5736   ins_encode %{
 5737     __ leaq($dst$$Register, $mem$$Address);
 5738   %}
 5739   ins_pipe(ialu_reg_reg_fat);
 5740 %}
 5741 
 5742 instruct loadConI(rRegI dst, immI src)
 5743 %{
 5744   match(Set dst src);
 5745 
 5746   format %{ "movl    $dst, $src\t# int" %}
 5747   ins_encode %{
 5748     __ movl($dst$$Register, $src$$constant);
 5749   %}
 5750   ins_pipe(ialu_reg_fat); // XXX
 5751 %}
 5752 
 5753 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 5754 %{
 5755   match(Set dst src);
 5756   effect(KILL cr);
 5757 
 5758   ins_cost(50);
 5759   format %{ "xorl    $dst, $dst\t# int" %}
 5760   ins_encode %{
 5761     __ xorl($dst$$Register, $dst$$Register);
 5762   %}
 5763   ins_pipe(ialu_reg);
 5764 %}
 5765 
 5766 instruct loadConL(rRegL dst, immL src)
 5767 %{
 5768   match(Set dst src);
 5769 
 5770   ins_cost(150);
 5771   format %{ "movq    $dst, $src\t# long" %}
 5772   ins_encode %{
 5773     __ mov64($dst$$Register, $src$$constant);
 5774   %}
 5775   ins_pipe(ialu_reg);
 5776 %}
 5777 
 5778 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 5779 %{
 5780   match(Set dst src);
 5781   effect(KILL cr);
 5782 
 5783   ins_cost(50);
 5784   format %{ "xorl    $dst, $dst\t# long" %}
 5785   ins_encode %{
 5786     __ xorl($dst$$Register, $dst$$Register);
 5787   %}
 5788   ins_pipe(ialu_reg); // XXX
 5789 %}
 5790 
 5791 instruct loadConUL32(rRegL dst, immUL32 src)
 5792 %{
 5793   match(Set dst src);
 5794 
 5795   ins_cost(60);
 5796   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 5797   ins_encode %{
 5798     __ movl($dst$$Register, $src$$constant);
 5799   %}
 5800   ins_pipe(ialu_reg);
 5801 %}
 5802 
 5803 instruct loadConL32(rRegL dst, immL32 src)
 5804 %{
 5805   match(Set dst src);
 5806 
 5807   ins_cost(70);
 5808   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 5809   ins_encode %{
 5810     __ movq($dst$$Register, $src$$constant);
 5811   %}
 5812   ins_pipe(ialu_reg);
 5813 %}
 5814 
 5815 instruct loadConP(rRegP dst, immP con) %{
 5816   match(Set dst con);
 5817 
 5818   format %{ "movq    $dst, $con\t# ptr" %}
 5819   ins_encode %{
 5820     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 5821   %}
 5822   ins_pipe(ialu_reg_fat); // XXX
 5823 %}
 5824 
 5825 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 5826 %{
 5827   match(Set dst src);
 5828   effect(KILL cr);
 5829 
 5830   ins_cost(50);
 5831   format %{ "xorl    $dst, $dst\t# ptr" %}
 5832   ins_encode %{
 5833     __ xorl($dst$$Register, $dst$$Register);
 5834   %}
 5835   ins_pipe(ialu_reg);
 5836 %}
 5837 
 5838 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 5839 %{
 5840   match(Set dst src);
 5841   effect(KILL cr);
 5842 
 5843   ins_cost(60);
 5844   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5845   ins_encode %{
 5846     __ movl($dst$$Register, $src$$constant);
 5847   %}
 5848   ins_pipe(ialu_reg);
 5849 %}
 5850 
 5851 instruct loadConF(regF dst, immF con) %{
 5852   match(Set dst con);
 5853   ins_cost(125);
 5854   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5855   ins_encode %{
 5856     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5857   %}
 5858   ins_pipe(pipe_slow);
 5859 %}
 5860 
 5861 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5862   match(Set dst src);
 5863   effect(KILL cr);
 5864   format %{ "xorq    $dst, $src\t# compressed nullptr ptr" %}
 5865   ins_encode %{
 5866     __ xorq($dst$$Register, $dst$$Register);
 5867   %}
 5868   ins_pipe(ialu_reg);
 5869 %}
 5870 
 5871 instruct loadConN(rRegN dst, immN src) %{
 5872   match(Set dst src);
 5873 
 5874   ins_cost(125);
 5875   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5876   ins_encode %{
 5877     address con = (address)$src$$constant;
 5878     if (con == nullptr) {
 5879       ShouldNotReachHere();
 5880     } else {
 5881       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5882     }
 5883   %}
 5884   ins_pipe(ialu_reg_fat); // XXX
 5885 %}
 5886 
 5887 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5888   match(Set dst src);
 5889 
 5890   ins_cost(125);
 5891   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5892   ins_encode %{
 5893     address con = (address)$src$$constant;
 5894     if (con == nullptr) {
 5895       ShouldNotReachHere();
 5896     } else {
 5897       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5898     }
 5899   %}
 5900   ins_pipe(ialu_reg_fat); // XXX
 5901 %}
 5902 
 5903 instruct loadConF0(regF dst, immF0 src)
 5904 %{
 5905   match(Set dst src);
 5906   ins_cost(100);
 5907 
 5908   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5909   ins_encode %{
 5910     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5911   %}
 5912   ins_pipe(pipe_slow);
 5913 %}
 5914 
 5915 // Use the same format since predicate() can not be used here.
 5916 instruct loadConD(regD dst, immD con) %{
 5917   match(Set dst con);
 5918   ins_cost(125);
 5919   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5920   ins_encode %{
 5921     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5922   %}
 5923   ins_pipe(pipe_slow);
 5924 %}
 5925 
 5926 instruct loadConD0(regD dst, immD0 src)
 5927 %{
 5928   match(Set dst src);
 5929   ins_cost(100);
 5930 
 5931   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5932   ins_encode %{
 5933     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5934   %}
 5935   ins_pipe(pipe_slow);
 5936 %}
 5937 
 5938 instruct loadSSI(rRegI dst, stackSlotI src)
 5939 %{
 5940   match(Set dst src);
 5941 
 5942   ins_cost(125);
 5943   format %{ "movl    $dst, $src\t# int stk" %}
 5944   opcode(0x8B);
 5945   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
 5946   ins_pipe(ialu_reg_mem);
 5947 %}
 5948 
 5949 instruct loadSSL(rRegL dst, stackSlotL src)
 5950 %{
 5951   match(Set dst src);
 5952 
 5953   ins_cost(125);
 5954   format %{ "movq    $dst, $src\t# long stk" %}
 5955   opcode(0x8B);
 5956   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5957   ins_pipe(ialu_reg_mem);
 5958 %}
 5959 
 5960 instruct loadSSP(rRegP dst, stackSlotP src)
 5961 %{
 5962   match(Set dst src);
 5963 
 5964   ins_cost(125);
 5965   format %{ "movq    $dst, $src\t# ptr stk" %}
 5966   opcode(0x8B);
 5967   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5968   ins_pipe(ialu_reg_mem);
 5969 %}
 5970 
 5971 instruct loadSSF(regF dst, stackSlotF src)
 5972 %{
 5973   match(Set dst src);
 5974 
 5975   ins_cost(125);
 5976   format %{ "movss   $dst, $src\t# float stk" %}
 5977   ins_encode %{
 5978     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5979   %}
 5980   ins_pipe(pipe_slow); // XXX
 5981 %}
 5982 
 5983 // Use the same format since predicate() can not be used here.
 5984 instruct loadSSD(regD dst, stackSlotD src)
 5985 %{
 5986   match(Set dst src);
 5987 
 5988   ins_cost(125);
 5989   format %{ "movsd   $dst, $src\t# double stk" %}
 5990   ins_encode  %{
 5991     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5992   %}
 5993   ins_pipe(pipe_slow); // XXX
 5994 %}
 5995 
 5996 // Prefetch instructions for allocation.
 5997 // Must be safe to execute with invalid address (cannot fault).
 5998 
 5999 instruct prefetchAlloc( memory mem ) %{
 6000   predicate(AllocatePrefetchInstr==3);
 6001   match(PrefetchAllocation mem);
 6002   ins_cost(125);
 6003 
 6004   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 6005   ins_encode %{
 6006     __ prefetchw($mem$$Address);
 6007   %}
 6008   ins_pipe(ialu_mem);
 6009 %}
 6010 
 6011 instruct prefetchAllocNTA( memory mem ) %{
 6012   predicate(AllocatePrefetchInstr==0);
 6013   match(PrefetchAllocation mem);
 6014   ins_cost(125);
 6015 
 6016   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 6017   ins_encode %{
 6018     __ prefetchnta($mem$$Address);
 6019   %}
 6020   ins_pipe(ialu_mem);
 6021 %}
 6022 
 6023 instruct prefetchAllocT0( memory mem ) %{
 6024   predicate(AllocatePrefetchInstr==1);
 6025   match(PrefetchAllocation mem);
 6026   ins_cost(125);
 6027 
 6028   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 6029   ins_encode %{
 6030     __ prefetcht0($mem$$Address);
 6031   %}
 6032   ins_pipe(ialu_mem);
 6033 %}
 6034 
 6035 instruct prefetchAllocT2( memory mem ) %{
 6036   predicate(AllocatePrefetchInstr==2);
 6037   match(PrefetchAllocation mem);
 6038   ins_cost(125);
 6039 
 6040   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 6041   ins_encode %{
 6042     __ prefetcht2($mem$$Address);
 6043   %}
 6044   ins_pipe(ialu_mem);
 6045 %}
 6046 
 6047 //----------Store Instructions-------------------------------------------------
 6048 
 6049 // Store Byte
 6050 instruct storeB(memory mem, rRegI src)
 6051 %{
 6052   match(Set mem (StoreB mem src));
 6053 
 6054   ins_cost(125); // XXX
 6055   format %{ "movb    $mem, $src\t# byte" %}
 6056   ins_encode %{
 6057     __ movb($mem$$Address, $src$$Register);
 6058   %}
 6059   ins_pipe(ialu_mem_reg);
 6060 %}
 6061 
 6062 // Store Char/Short
 6063 instruct storeC(memory mem, rRegI src)
 6064 %{
 6065   match(Set mem (StoreC mem src));
 6066 
 6067   ins_cost(125); // XXX
 6068   format %{ "movw    $mem, $src\t# char/short" %}
 6069   ins_encode %{
 6070     __ movw($mem$$Address, $src$$Register);
 6071   %}
 6072   ins_pipe(ialu_mem_reg);
 6073 %}
 6074 
 6075 // Store Integer
 6076 instruct storeI(memory mem, rRegI src)
 6077 %{
 6078   match(Set mem (StoreI mem src));
 6079 
 6080   ins_cost(125); // XXX
 6081   format %{ "movl    $mem, $src\t# int" %}
 6082   ins_encode %{
 6083     __ movl($mem$$Address, $src$$Register);
 6084   %}
 6085   ins_pipe(ialu_mem_reg);
 6086 %}
 6087 
 6088 // Store Long
 6089 instruct storeL(memory mem, rRegL src)
 6090 %{
 6091   match(Set mem (StoreL mem src));
 6092 
 6093   ins_cost(125); // XXX
 6094   format %{ "movq    $mem, $src\t# long" %}
 6095   ins_encode %{
 6096     __ movq($mem$$Address, $src$$Register);
 6097   %}
 6098   ins_pipe(ialu_mem_reg); // XXX
 6099 %}
 6100 
 6101 // Store Pointer
 6102 instruct storeP(memory mem, any_RegP src)
 6103 %{
 6104   predicate(n->as_Store()->barrier_data() == 0);
 6105   match(Set mem (StoreP mem src));
 6106 
 6107   ins_cost(125); // XXX
 6108   format %{ "movq    $mem, $src\t# ptr" %}
 6109   ins_encode %{
 6110     __ movq($mem$$Address, $src$$Register);
 6111   %}
 6112   ins_pipe(ialu_mem_reg);
 6113 %}
 6114 
 6115 instruct storeImmP0(memory mem, immP0 zero)
 6116 %{
 6117   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 6118   match(Set mem (StoreP mem zero));
 6119 
 6120   ins_cost(125); // XXX
 6121   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6122   ins_encode %{
 6123     __ movq($mem$$Address, r12);
 6124   %}
 6125   ins_pipe(ialu_mem_reg);
 6126 %}
 6127 
 6128 // Store nullptr Pointer, mark word, or other simple pointer constant.
 6129 instruct storeImmP(memory mem, immP31 src)
 6130 %{
 6131   predicate(n->as_Store()->barrier_data() == 0);
 6132   match(Set mem (StoreP mem src));
 6133 
 6134   ins_cost(150); // XXX
 6135   format %{ "movq    $mem, $src\t# ptr" %}
 6136   ins_encode %{
 6137     __ movq($mem$$Address, $src$$constant);
 6138   %}
 6139   ins_pipe(ialu_mem_imm);
 6140 %}
 6141 
 6142 // Store Compressed Pointer
 6143 instruct storeN(memory mem, rRegN src)
 6144 %{
 6145   match(Set mem (StoreN mem src));
 6146 
 6147   ins_cost(125); // XXX
 6148   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6149   ins_encode %{
 6150     __ movl($mem$$Address, $src$$Register);
 6151   %}
 6152   ins_pipe(ialu_mem_reg);
 6153 %}
 6154 
 6155 instruct storeNKlass(memory mem, rRegN src)
 6156 %{
 6157   match(Set mem (StoreNKlass mem src));
 6158 
 6159   ins_cost(125); // XXX
 6160   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6161   ins_encode %{
 6162     __ movl($mem$$Address, $src$$Register);
 6163   %}
 6164   ins_pipe(ialu_mem_reg);
 6165 %}
 6166 
 6167 instruct storeImmN0(memory mem, immN0 zero)
 6168 %{
 6169   predicate(CompressedOops::base() == nullptr);
 6170   match(Set mem (StoreN mem zero));
 6171 
 6172   ins_cost(125); // XXX
 6173   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6174   ins_encode %{
 6175     __ movl($mem$$Address, r12);
 6176   %}
 6177   ins_pipe(ialu_mem_reg);
 6178 %}
 6179 
 6180 instruct storeImmN(memory mem, immN src)
 6181 %{
 6182   match(Set mem (StoreN mem src));
 6183 
 6184   ins_cost(150); // XXX
 6185   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6186   ins_encode %{
 6187     address con = (address)$src$$constant;
 6188     if (con == nullptr) {
 6189       __ movl($mem$$Address, 0);
 6190     } else {
 6191       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6192     }
 6193   %}
 6194   ins_pipe(ialu_mem_imm);
 6195 %}
 6196 
 6197 instruct storeImmNKlass(memory mem, immNKlass src)
 6198 %{
 6199   match(Set mem (StoreNKlass mem src));
 6200 
 6201   ins_cost(150); // XXX
 6202   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6203   ins_encode %{
 6204     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6205   %}
 6206   ins_pipe(ialu_mem_imm);
 6207 %}
 6208 
 6209 // Store Integer Immediate
 6210 instruct storeImmI0(memory mem, immI_0 zero)
 6211 %{
 6212   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6213   match(Set mem (StoreI mem zero));
 6214 
 6215   ins_cost(125); // XXX
 6216   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6217   ins_encode %{
 6218     __ movl($mem$$Address, r12);
 6219   %}
 6220   ins_pipe(ialu_mem_reg);
 6221 %}
 6222 
 6223 instruct storeImmI(memory mem, immI src)
 6224 %{
 6225   match(Set mem (StoreI mem src));
 6226 
 6227   ins_cost(150);
 6228   format %{ "movl    $mem, $src\t# int" %}
 6229   ins_encode %{
 6230     __ movl($mem$$Address, $src$$constant);
 6231   %}
 6232   ins_pipe(ialu_mem_imm);
 6233 %}
 6234 
 6235 // Store Long Immediate
 6236 instruct storeImmL0(memory mem, immL0 zero)
 6237 %{
 6238   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6239   match(Set mem (StoreL mem zero));
 6240 
 6241   ins_cost(125); // XXX
 6242   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6243   ins_encode %{
 6244     __ movq($mem$$Address, r12);
 6245   %}
 6246   ins_pipe(ialu_mem_reg);
 6247 %}
 6248 
 6249 instruct storeImmL(memory mem, immL32 src)
 6250 %{
 6251   match(Set mem (StoreL mem src));
 6252 
 6253   ins_cost(150);
 6254   format %{ "movq    $mem, $src\t# long" %}
 6255   ins_encode %{
 6256     __ movq($mem$$Address, $src$$constant);
 6257   %}
 6258   ins_pipe(ialu_mem_imm);
 6259 %}
 6260 
 6261 // Store Short/Char Immediate
 6262 instruct storeImmC0(memory mem, immI_0 zero)
 6263 %{
 6264   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6265   match(Set mem (StoreC mem zero));
 6266 
 6267   ins_cost(125); // XXX
 6268   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6269   ins_encode %{
 6270     __ movw($mem$$Address, r12);
 6271   %}
 6272   ins_pipe(ialu_mem_reg);
 6273 %}
 6274 
 6275 instruct storeImmI16(memory mem, immI16 src)
 6276 %{
 6277   predicate(UseStoreImmI16);
 6278   match(Set mem (StoreC mem src));
 6279 
 6280   ins_cost(150);
 6281   format %{ "movw    $mem, $src\t# short/char" %}
 6282   ins_encode %{
 6283     __ movw($mem$$Address, $src$$constant);
 6284   %}
 6285   ins_pipe(ialu_mem_imm);
 6286 %}
 6287 
 6288 // Store Byte Immediate
 6289 instruct storeImmB0(memory mem, immI_0 zero)
 6290 %{
 6291   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6292   match(Set mem (StoreB mem zero));
 6293 
 6294   ins_cost(125); // XXX
 6295   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6296   ins_encode %{
 6297     __ movb($mem$$Address, r12);
 6298   %}
 6299   ins_pipe(ialu_mem_reg);
 6300 %}
 6301 
 6302 instruct storeImmB(memory mem, immI8 src)
 6303 %{
 6304   match(Set mem (StoreB mem src));
 6305 
 6306   ins_cost(150); // XXX
 6307   format %{ "movb    $mem, $src\t# byte" %}
 6308   ins_encode %{
 6309     __ movb($mem$$Address, $src$$constant);
 6310   %}
 6311   ins_pipe(ialu_mem_imm);
 6312 %}
 6313 
 6314 // Store CMS card-mark Immediate
 6315 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6316 %{
 6317   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6318   match(Set mem (StoreCM mem zero));
 6319 
 6320   ins_cost(125); // XXX
 6321   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6322   ins_encode %{
 6323     __ movb($mem$$Address, r12);
 6324   %}
 6325   ins_pipe(ialu_mem_reg);
 6326 %}
 6327 
 6328 instruct storeImmCM0(memory mem, immI_0 src)
 6329 %{
 6330   match(Set mem (StoreCM mem src));
 6331 
 6332   ins_cost(150); // XXX
 6333   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6334   ins_encode %{
 6335     __ movb($mem$$Address, $src$$constant);
 6336   %}
 6337   ins_pipe(ialu_mem_imm);
 6338 %}
 6339 
 6340 // Store Float
 6341 instruct storeF(memory mem, regF src)
 6342 %{
 6343   match(Set mem (StoreF mem src));
 6344 
 6345   ins_cost(95); // XXX
 6346   format %{ "movss   $mem, $src\t# float" %}
 6347   ins_encode %{
 6348     __ movflt($mem$$Address, $src$$XMMRegister);
 6349   %}
 6350   ins_pipe(pipe_slow); // XXX
 6351 %}
 6352 
 6353 // Store immediate Float value (it is faster than store from XMM register)
 6354 instruct storeF0(memory mem, immF0 zero)
 6355 %{
 6356   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6357   match(Set mem (StoreF mem zero));
 6358 
 6359   ins_cost(25); // XXX
 6360   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6361   ins_encode %{
 6362     __ movl($mem$$Address, r12);
 6363   %}
 6364   ins_pipe(ialu_mem_reg);
 6365 %}
 6366 
 6367 instruct storeF_imm(memory mem, immF src)
 6368 %{
 6369   match(Set mem (StoreF mem src));
 6370 
 6371   ins_cost(50);
 6372   format %{ "movl    $mem, $src\t# float" %}
 6373   ins_encode %{
 6374     __ movl($mem$$Address, jint_cast($src$$constant));
 6375   %}
 6376   ins_pipe(ialu_mem_imm);
 6377 %}
 6378 
 6379 // Store Double
 6380 instruct storeD(memory mem, regD src)
 6381 %{
 6382   match(Set mem (StoreD mem src));
 6383 
 6384   ins_cost(95); // XXX
 6385   format %{ "movsd   $mem, $src\t# double" %}
 6386   ins_encode %{
 6387     __ movdbl($mem$$Address, $src$$XMMRegister);
 6388   %}
 6389   ins_pipe(pipe_slow); // XXX
 6390 %}
 6391 
 6392 // Store immediate double 0.0 (it is faster than store from XMM register)
 6393 instruct storeD0_imm(memory mem, immD0 src)
 6394 %{
 6395   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 6396   match(Set mem (StoreD mem src));
 6397 
 6398   ins_cost(50);
 6399   format %{ "movq    $mem, $src\t# double 0." %}
 6400   ins_encode %{
 6401     __ movq($mem$$Address, $src$$constant);
 6402   %}
 6403   ins_pipe(ialu_mem_imm);
 6404 %}
 6405 
 6406 instruct storeD0(memory mem, immD0 zero)
 6407 %{
 6408   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 6409   match(Set mem (StoreD mem zero));
 6410 
 6411   ins_cost(25); // XXX
 6412   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6413   ins_encode %{
 6414     __ movq($mem$$Address, r12);
 6415   %}
 6416   ins_pipe(ialu_mem_reg);
 6417 %}
 6418 
 6419 instruct storeSSI(stackSlotI dst, rRegI src)
 6420 %{
 6421   match(Set dst src);
 6422 
 6423   ins_cost(100);
 6424   format %{ "movl    $dst, $src\t# int stk" %}
 6425   opcode(0x89);
 6426   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6427   ins_pipe( ialu_mem_reg );
 6428 %}
 6429 
 6430 instruct storeSSL(stackSlotL dst, rRegL src)
 6431 %{
 6432   match(Set dst src);
 6433 
 6434   ins_cost(100);
 6435   format %{ "movq    $dst, $src\t# long stk" %}
 6436   opcode(0x89);
 6437   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6438   ins_pipe(ialu_mem_reg);
 6439 %}
 6440 
 6441 instruct storeSSP(stackSlotP dst, rRegP src)
 6442 %{
 6443   match(Set dst src);
 6444 
 6445   ins_cost(100);
 6446   format %{ "movq    $dst, $src\t# ptr stk" %}
 6447   opcode(0x89);
 6448   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6449   ins_pipe(ialu_mem_reg);
 6450 %}
 6451 
 6452 instruct storeSSF(stackSlotF dst, regF src)
 6453 %{
 6454   match(Set dst src);
 6455 
 6456   ins_cost(95); // XXX
 6457   format %{ "movss   $dst, $src\t# float stk" %}
 6458   ins_encode %{
 6459     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6460   %}
 6461   ins_pipe(pipe_slow); // XXX
 6462 %}
 6463 
 6464 instruct storeSSD(stackSlotD dst, regD src)
 6465 %{
 6466   match(Set dst src);
 6467 
 6468   ins_cost(95); // XXX
 6469   format %{ "movsd   $dst, $src\t# double stk" %}
 6470   ins_encode %{
 6471     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6472   %}
 6473   ins_pipe(pipe_slow); // XXX
 6474 %}
 6475 
 6476 instruct cacheWB(indirect addr)
 6477 %{
 6478   predicate(VM_Version::supports_data_cache_line_flush());
 6479   match(CacheWB addr);
 6480 
 6481   ins_cost(100);
 6482   format %{"cache wb $addr" %}
 6483   ins_encode %{
 6484     assert($addr->index_position() < 0, "should be");
 6485     assert($addr$$disp == 0, "should be");
 6486     __ cache_wb(Address($addr$$base$$Register, 0));
 6487   %}
 6488   ins_pipe(pipe_slow); // XXX
 6489 %}
 6490 
 6491 instruct cacheWBPreSync()
 6492 %{
 6493   predicate(VM_Version::supports_data_cache_line_flush());
 6494   match(CacheWBPreSync);
 6495 
 6496   ins_cost(100);
 6497   format %{"cache wb presync" %}
 6498   ins_encode %{
 6499     __ cache_wbsync(true);
 6500   %}
 6501   ins_pipe(pipe_slow); // XXX
 6502 %}
 6503 
 6504 instruct cacheWBPostSync()
 6505 %{
 6506   predicate(VM_Version::supports_data_cache_line_flush());
 6507   match(CacheWBPostSync);
 6508 
 6509   ins_cost(100);
 6510   format %{"cache wb postsync" %}
 6511   ins_encode %{
 6512     __ cache_wbsync(false);
 6513   %}
 6514   ins_pipe(pipe_slow); // XXX
 6515 %}
 6516 
 6517 //----------BSWAP Instructions-------------------------------------------------
 6518 instruct bytes_reverse_int(rRegI dst) %{
 6519   match(Set dst (ReverseBytesI dst));
 6520 
 6521   format %{ "bswapl  $dst" %}
 6522   ins_encode %{
 6523     __ bswapl($dst$$Register);
 6524   %}
 6525   ins_pipe( ialu_reg );
 6526 %}
 6527 
 6528 instruct bytes_reverse_long(rRegL dst) %{
 6529   match(Set dst (ReverseBytesL dst));
 6530 
 6531   format %{ "bswapq  $dst" %}
 6532   ins_encode %{
 6533     __ bswapq($dst$$Register);
 6534   %}
 6535   ins_pipe( ialu_reg);
 6536 %}
 6537 
 6538 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 6539   match(Set dst (ReverseBytesUS dst));
 6540   effect(KILL cr);
 6541 
 6542   format %{ "bswapl  $dst\n\t"
 6543             "shrl    $dst,16\n\t" %}
 6544   ins_encode %{
 6545     __ bswapl($dst$$Register);
 6546     __ shrl($dst$$Register, 16);
 6547   %}
 6548   ins_pipe( ialu_reg );
 6549 %}
 6550 
 6551 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 6552   match(Set dst (ReverseBytesS dst));
 6553   effect(KILL cr);
 6554 
 6555   format %{ "bswapl  $dst\n\t"
 6556             "sar     $dst,16\n\t" %}
 6557   ins_encode %{
 6558     __ bswapl($dst$$Register);
 6559     __ sarl($dst$$Register, 16);
 6560   %}
 6561   ins_pipe( ialu_reg );
 6562 %}
 6563 
 6564 //---------- Zeros Count Instructions ------------------------------------------
 6565 
 6566 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6567   predicate(UseCountLeadingZerosInstruction);
 6568   match(Set dst (CountLeadingZerosI src));
 6569   effect(KILL cr);
 6570 
 6571   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6572   ins_encode %{
 6573     __ lzcntl($dst$$Register, $src$$Register);
 6574   %}
 6575   ins_pipe(ialu_reg);
 6576 %}
 6577 
 6578 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6579   predicate(UseCountLeadingZerosInstruction);
 6580   match(Set dst (CountLeadingZerosI (LoadI src)));
 6581   effect(KILL cr);
 6582   ins_cost(175);
 6583   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6584   ins_encode %{
 6585     __ lzcntl($dst$$Register, $src$$Address);
 6586   %}
 6587   ins_pipe(ialu_reg_mem);
 6588 %}
 6589 
 6590 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 6591   predicate(!UseCountLeadingZerosInstruction);
 6592   match(Set dst (CountLeadingZerosI src));
 6593   effect(KILL cr);
 6594 
 6595   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 6596             "jnz     skip\n\t"
 6597             "movl    $dst, -1\n"
 6598       "skip:\n\t"
 6599             "negl    $dst\n\t"
 6600             "addl    $dst, 31" %}
 6601   ins_encode %{
 6602     Register Rdst = $dst$$Register;
 6603     Register Rsrc = $src$$Register;
 6604     Label skip;
 6605     __ bsrl(Rdst, Rsrc);
 6606     __ jccb(Assembler::notZero, skip);
 6607     __ movl(Rdst, -1);
 6608     __ bind(skip);
 6609     __ negl(Rdst);
 6610     __ addl(Rdst, BitsPerInt - 1);
 6611   %}
 6612   ins_pipe(ialu_reg);
 6613 %}
 6614 
 6615 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6616   predicate(UseCountLeadingZerosInstruction);
 6617   match(Set dst (CountLeadingZerosL src));
 6618   effect(KILL cr);
 6619 
 6620   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6621   ins_encode %{
 6622     __ lzcntq($dst$$Register, $src$$Register);
 6623   %}
 6624   ins_pipe(ialu_reg);
 6625 %}
 6626 
 6627 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6628   predicate(UseCountLeadingZerosInstruction);
 6629   match(Set dst (CountLeadingZerosL (LoadL src)));
 6630   effect(KILL cr);
 6631   ins_cost(175);
 6632   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6633   ins_encode %{
 6634     __ lzcntq($dst$$Register, $src$$Address);
 6635   %}
 6636   ins_pipe(ialu_reg_mem);
 6637 %}
 6638 
 6639 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 6640   predicate(!UseCountLeadingZerosInstruction);
 6641   match(Set dst (CountLeadingZerosL src));
 6642   effect(KILL cr);
 6643 
 6644   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 6645             "jnz     skip\n\t"
 6646             "movl    $dst, -1\n"
 6647       "skip:\n\t"
 6648             "negl    $dst\n\t"
 6649             "addl    $dst, 63" %}
 6650   ins_encode %{
 6651     Register Rdst = $dst$$Register;
 6652     Register Rsrc = $src$$Register;
 6653     Label skip;
 6654     __ bsrq(Rdst, Rsrc);
 6655     __ jccb(Assembler::notZero, skip);
 6656     __ movl(Rdst, -1);
 6657     __ bind(skip);
 6658     __ negl(Rdst);
 6659     __ addl(Rdst, BitsPerLong - 1);
 6660   %}
 6661   ins_pipe(ialu_reg);
 6662 %}
 6663 
 6664 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6665   predicate(UseCountTrailingZerosInstruction);
 6666   match(Set dst (CountTrailingZerosI src));
 6667   effect(KILL cr);
 6668 
 6669   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6670   ins_encode %{
 6671     __ tzcntl($dst$$Register, $src$$Register);
 6672   %}
 6673   ins_pipe(ialu_reg);
 6674 %}
 6675 
 6676 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6677   predicate(UseCountTrailingZerosInstruction);
 6678   match(Set dst (CountTrailingZerosI (LoadI src)));
 6679   effect(KILL cr);
 6680   ins_cost(175);
 6681   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6682   ins_encode %{
 6683     __ tzcntl($dst$$Register, $src$$Address);
 6684   %}
 6685   ins_pipe(ialu_reg_mem);
 6686 %}
 6687 
 6688 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 6689   predicate(!UseCountTrailingZerosInstruction);
 6690   match(Set dst (CountTrailingZerosI src));
 6691   effect(KILL cr);
 6692 
 6693   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 6694             "jnz     done\n\t"
 6695             "movl    $dst, 32\n"
 6696       "done:" %}
 6697   ins_encode %{
 6698     Register Rdst = $dst$$Register;
 6699     Label done;
 6700     __ bsfl(Rdst, $src$$Register);
 6701     __ jccb(Assembler::notZero, done);
 6702     __ movl(Rdst, BitsPerInt);
 6703     __ bind(done);
 6704   %}
 6705   ins_pipe(ialu_reg);
 6706 %}
 6707 
 6708 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6709   predicate(UseCountTrailingZerosInstruction);
 6710   match(Set dst (CountTrailingZerosL src));
 6711   effect(KILL cr);
 6712 
 6713   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6714   ins_encode %{
 6715     __ tzcntq($dst$$Register, $src$$Register);
 6716   %}
 6717   ins_pipe(ialu_reg);
 6718 %}
 6719 
 6720 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6721   predicate(UseCountTrailingZerosInstruction);
 6722   match(Set dst (CountTrailingZerosL (LoadL src)));
 6723   effect(KILL cr);
 6724   ins_cost(175);
 6725   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6726   ins_encode %{
 6727     __ tzcntq($dst$$Register, $src$$Address);
 6728   %}
 6729   ins_pipe(ialu_reg_mem);
 6730 %}
 6731 
 6732 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 6733   predicate(!UseCountTrailingZerosInstruction);
 6734   match(Set dst (CountTrailingZerosL src));
 6735   effect(KILL cr);
 6736 
 6737   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 6738             "jnz     done\n\t"
 6739             "movl    $dst, 64\n"
 6740       "done:" %}
 6741   ins_encode %{
 6742     Register Rdst = $dst$$Register;
 6743     Label done;
 6744     __ bsfq(Rdst, $src$$Register);
 6745     __ jccb(Assembler::notZero, done);
 6746     __ movl(Rdst, BitsPerLong);
 6747     __ bind(done);
 6748   %}
 6749   ins_pipe(ialu_reg);
 6750 %}
 6751 
 6752 //--------------- Reverse Operation Instructions ----------------
 6753 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 6754   predicate(!VM_Version::supports_gfni());
 6755   match(Set dst (ReverseI src));
 6756   effect(TEMP dst, TEMP rtmp, KILL cr);
 6757   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 6758   ins_encode %{
 6759     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 6760   %}
 6761   ins_pipe( ialu_reg );
 6762 %}
 6763 
 6764 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6765   predicate(VM_Version::supports_gfni());
 6766   match(Set dst (ReverseI src));
 6767   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6768   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6769   ins_encode %{
 6770     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 6771   %}
 6772   ins_pipe( ialu_reg );
 6773 %}
 6774 
 6775 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 6776   predicate(!VM_Version::supports_gfni());
 6777   match(Set dst (ReverseL src));
 6778   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 6779   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 6780   ins_encode %{
 6781     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 6782   %}
 6783   ins_pipe( ialu_reg );
 6784 %}
 6785 
 6786 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6787   predicate(VM_Version::supports_gfni());
 6788   match(Set dst (ReverseL src));
 6789   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6790   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6791   ins_encode %{
 6792     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 6793   %}
 6794   ins_pipe( ialu_reg );
 6795 %}
 6796 
 6797 //---------- Population Count Instructions -------------------------------------
 6798 
 6799 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6800   predicate(UsePopCountInstruction);
 6801   match(Set dst (PopCountI src));
 6802   effect(KILL cr);
 6803 
 6804   format %{ "popcnt  $dst, $src" %}
 6805   ins_encode %{
 6806     __ popcntl($dst$$Register, $src$$Register);
 6807   %}
 6808   ins_pipe(ialu_reg);
 6809 %}
 6810 
 6811 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6812   predicate(UsePopCountInstruction);
 6813   match(Set dst (PopCountI (LoadI mem)));
 6814   effect(KILL cr);
 6815 
 6816   format %{ "popcnt  $dst, $mem" %}
 6817   ins_encode %{
 6818     __ popcntl($dst$$Register, $mem$$Address);
 6819   %}
 6820   ins_pipe(ialu_reg);
 6821 %}
 6822 
 6823 // Note: Long.bitCount(long) returns an int.
 6824 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6825   predicate(UsePopCountInstruction);
 6826   match(Set dst (PopCountL src));
 6827   effect(KILL cr);
 6828 
 6829   format %{ "popcnt  $dst, $src" %}
 6830   ins_encode %{
 6831     __ popcntq($dst$$Register, $src$$Register);
 6832   %}
 6833   ins_pipe(ialu_reg);
 6834 %}
 6835 
 6836 // Note: Long.bitCount(long) returns an int.
 6837 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6838   predicate(UsePopCountInstruction);
 6839   match(Set dst (PopCountL (LoadL mem)));
 6840   effect(KILL cr);
 6841 
 6842   format %{ "popcnt  $dst, $mem" %}
 6843   ins_encode %{
 6844     __ popcntq($dst$$Register, $mem$$Address);
 6845   %}
 6846   ins_pipe(ialu_reg);
 6847 %}
 6848 
 6849 
 6850 //----------MemBar Instructions-----------------------------------------------
 6851 // Memory barrier flavors
 6852 
 6853 instruct membar_acquire()
 6854 %{
 6855   match(MemBarAcquire);
 6856   match(LoadFence);
 6857   ins_cost(0);
 6858 
 6859   size(0);
 6860   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6861   ins_encode();
 6862   ins_pipe(empty);
 6863 %}
 6864 
 6865 instruct membar_acquire_lock()
 6866 %{
 6867   match(MemBarAcquireLock);
 6868   ins_cost(0);
 6869 
 6870   size(0);
 6871   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6872   ins_encode();
 6873   ins_pipe(empty);
 6874 %}
 6875 
 6876 instruct membar_release()
 6877 %{
 6878   match(MemBarRelease);
 6879   match(StoreFence);
 6880   ins_cost(0);
 6881 
 6882   size(0);
 6883   format %{ "MEMBAR-release ! (empty encoding)" %}
 6884   ins_encode();
 6885   ins_pipe(empty);
 6886 %}
 6887 
 6888 instruct membar_release_lock()
 6889 %{
 6890   match(MemBarReleaseLock);
 6891   ins_cost(0);
 6892 
 6893   size(0);
 6894   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6895   ins_encode();
 6896   ins_pipe(empty);
 6897 %}
 6898 
 6899 instruct membar_volatile(rFlagsReg cr) %{
 6900   match(MemBarVolatile);
 6901   effect(KILL cr);
 6902   ins_cost(400);
 6903 
 6904   format %{
 6905     $$template
 6906     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6907   %}
 6908   ins_encode %{
 6909     __ membar(Assembler::StoreLoad);
 6910   %}
 6911   ins_pipe(pipe_slow);
 6912 %}
 6913 
 6914 instruct unnecessary_membar_volatile()
 6915 %{
 6916   match(MemBarVolatile);
 6917   predicate(Matcher::post_store_load_barrier(n));
 6918   ins_cost(0);
 6919 
 6920   size(0);
 6921   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6922   ins_encode();
 6923   ins_pipe(empty);
 6924 %}
 6925 
 6926 instruct membar_storestore() %{
 6927   match(MemBarStoreStore);
 6928   match(StoreStoreFence);
 6929   ins_cost(0);
 6930 
 6931   size(0);
 6932   format %{ "MEMBAR-storestore (empty encoding)" %}
 6933   ins_encode( );
 6934   ins_pipe(empty);
 6935 %}
 6936 
 6937 //----------Move Instructions--------------------------------------------------
 6938 
 6939 instruct castX2P(rRegP dst, rRegL src)
 6940 %{
 6941   match(Set dst (CastX2P src));
 6942 
 6943   format %{ "movq    $dst, $src\t# long->ptr" %}
 6944   ins_encode %{
 6945     if ($dst$$reg != $src$$reg) {
 6946       __ movptr($dst$$Register, $src$$Register);
 6947     }
 6948   %}
 6949   ins_pipe(ialu_reg_reg); // XXX
 6950 %}
 6951 
 6952 instruct castN2X(rRegL dst, rRegN src)
 6953 %{
 6954   match(Set dst (CastP2X src));
 6955 
 6956   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6957   ins_encode %{
 6958     if ($dst$$reg != $src$$reg) {
 6959       __ movptr($dst$$Register, $src$$Register);
 6960     }
 6961   %}
 6962   ins_pipe(ialu_reg_reg); // XXX
 6963 %}
 6964 
 6965 instruct castP2X(rRegL dst, rRegP src)
 6966 %{
 6967   match(Set dst (CastP2X src));
 6968 
 6969   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6970   ins_encode %{
 6971     if ($dst$$reg != $src$$reg) {
 6972       __ movptr($dst$$Register, $src$$Register);
 6973     }
 6974   %}
 6975   ins_pipe(ialu_reg_reg); // XXX
 6976 %}
 6977 
 6978 // Convert oop into int for vectors alignment masking
 6979 instruct convP2I(rRegI dst, rRegP src)
 6980 %{
 6981   match(Set dst (ConvL2I (CastP2X src)));
 6982 
 6983   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6984   ins_encode %{
 6985     __ movl($dst$$Register, $src$$Register);
 6986   %}
 6987   ins_pipe(ialu_reg_reg); // XXX
 6988 %}
 6989 
 6990 // Convert compressed oop into int for vectors alignment masking
 6991 // in case of 32bit oops (heap < 4Gb).
 6992 instruct convN2I(rRegI dst, rRegN src)
 6993 %{
 6994   predicate(CompressedOops::shift() == 0);
 6995   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6996 
 6997   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6998   ins_encode %{
 6999     __ movl($dst$$Register, $src$$Register);
 7000   %}
 7001   ins_pipe(ialu_reg_reg); // XXX
 7002 %}
 7003 
 7004 // Convert oop pointer into compressed form
 7005 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 7006   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 7007   match(Set dst (EncodeP src));
 7008   effect(KILL cr);
 7009   format %{ "encode_heap_oop $dst,$src" %}
 7010   ins_encode %{
 7011     Register s = $src$$Register;
 7012     Register d = $dst$$Register;
 7013     if (s != d) {
 7014       __ movq(d, s);
 7015     }
 7016     __ encode_heap_oop(d);
 7017   %}
 7018   ins_pipe(ialu_reg_long);
 7019 %}
 7020 
 7021 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7022   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 7023   match(Set dst (EncodeP src));
 7024   effect(KILL cr);
 7025   format %{ "encode_heap_oop_not_null $dst,$src" %}
 7026   ins_encode %{
 7027     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 7028   %}
 7029   ins_pipe(ialu_reg_long);
 7030 %}
 7031 
 7032 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 7033   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 7034             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 7035   match(Set dst (DecodeN src));
 7036   effect(KILL cr);
 7037   format %{ "decode_heap_oop $dst,$src" %}
 7038   ins_encode %{
 7039     Register s = $src$$Register;
 7040     Register d = $dst$$Register;
 7041     if (s != d) {
 7042       __ movq(d, s);
 7043     }
 7044     __ decode_heap_oop(d);
 7045   %}
 7046   ins_pipe(ialu_reg_long);
 7047 %}
 7048 
 7049 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7050   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 7051             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 7052   match(Set dst (DecodeN src));
 7053   effect(KILL cr);
 7054   format %{ "decode_heap_oop_not_null $dst,$src" %}
 7055   ins_encode %{
 7056     Register s = $src$$Register;
 7057     Register d = $dst$$Register;
 7058     if (s != d) {
 7059       __ decode_heap_oop_not_null(d, s);
 7060     } else {
 7061       __ decode_heap_oop_not_null(d);
 7062     }
 7063   %}
 7064   ins_pipe(ialu_reg_long);
 7065 %}
 7066 
 7067 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7068   match(Set dst (EncodePKlass src));
 7069   effect(TEMP dst, KILL cr);
 7070   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 7071   ins_encode %{
 7072     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7073   %}
 7074   ins_pipe(ialu_reg_long);
 7075 %}
 7076 
 7077 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7078   match(Set dst (DecodeNKlass src));
 7079   effect(TEMP dst, KILL cr);
 7080   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 7081   ins_encode %{
 7082     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7083   %}
 7084   ins_pipe(ialu_reg_long);
 7085 %}
 7086 
 7087 //----------Conditional Move---------------------------------------------------
 7088 // Jump
 7089 // dummy instruction for generating temp registers
 7090 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 7091   match(Jump (LShiftL switch_val shift));
 7092   ins_cost(350);
 7093   predicate(false);
 7094   effect(TEMP dest);
 7095 
 7096   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7097             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 7098   ins_encode %{
 7099     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7100     // to do that and the compiler is using that register as one it can allocate.
 7101     // So we build it all by hand.
 7102     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 7103     // ArrayAddress dispatch(table, index);
 7104     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 7105     __ lea($dest$$Register, $constantaddress);
 7106     __ jmp(dispatch);
 7107   %}
 7108   ins_pipe(pipe_jmp);
 7109 %}
 7110 
 7111 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 7112   match(Jump (AddL (LShiftL switch_val shift) offset));
 7113   ins_cost(350);
 7114   effect(TEMP dest);
 7115 
 7116   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7117             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 7118   ins_encode %{
 7119     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7120     // to do that and the compiler is using that register as one it can allocate.
 7121     // So we build it all by hand.
 7122     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7123     // ArrayAddress dispatch(table, index);
 7124     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7125     __ lea($dest$$Register, $constantaddress);
 7126     __ jmp(dispatch);
 7127   %}
 7128   ins_pipe(pipe_jmp);
 7129 %}
 7130 
 7131 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 7132   match(Jump switch_val);
 7133   ins_cost(350);
 7134   effect(TEMP dest);
 7135 
 7136   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7137             "jmp     [$dest + $switch_val]\n\t" %}
 7138   ins_encode %{
 7139     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7140     // to do that and the compiler is using that register as one it can allocate.
 7141     // So we build it all by hand.
 7142     // Address index(noreg, switch_reg, Address::times_1);
 7143     // ArrayAddress dispatch(table, index);
 7144     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 7145     __ lea($dest$$Register, $constantaddress);
 7146     __ jmp(dispatch);
 7147   %}
 7148   ins_pipe(pipe_jmp);
 7149 %}
 7150 
 7151 // Conditional move
 7152 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7153 %{
 7154   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7155   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7156 
 7157   ins_cost(100); // XXX
 7158   format %{ "setbn$cop $dst\t# signed, int" %}
 7159   ins_encode %{
 7160     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7161     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7162   %}
 7163   ins_pipe(ialu_reg);
 7164 %}
 7165 
 7166 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 7167 %{
 7168   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7169 
 7170   ins_cost(200); // XXX
 7171   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7172   ins_encode %{
 7173     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7174   %}
 7175   ins_pipe(pipe_cmov_reg);
 7176 %}
 7177 
 7178 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7179 %{
 7180   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7181   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7182 
 7183   ins_cost(100); // XXX
 7184   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7185   ins_encode %{
 7186     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7187     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7188   %}
 7189   ins_pipe(ialu_reg);
 7190 %}
 7191 
 7192 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 7193   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7194 
 7195   ins_cost(200); // XXX
 7196   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7197   ins_encode %{
 7198     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7199   %}
 7200   ins_pipe(pipe_cmov_reg);
 7201 %}
 7202 
 7203 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7204 %{
 7205   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7206   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7207 
 7208   ins_cost(100); // XXX
 7209   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7210   ins_encode %{
 7211     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7212     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7213   %}
 7214   ins_pipe(ialu_reg);
 7215 %}
 7216 
 7217 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7218   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7219   ins_cost(200);
 7220   expand %{
 7221     cmovI_regU(cop, cr, dst, src);
 7222   %}
 7223 %}
 7224 
 7225 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7226   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7227   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7228 
 7229   ins_cost(200); // XXX
 7230   format %{ "cmovpl  $dst, $src\n\t"
 7231             "cmovnel $dst, $src" %}
 7232   ins_encode %{
 7233     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7234     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7235   %}
 7236   ins_pipe(pipe_cmov_reg);
 7237 %}
 7238 
 7239 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7240 // inputs of the CMove
 7241 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7242   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7243   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7244 
 7245   ins_cost(200); // XXX
 7246   format %{ "cmovpl  $dst, $src\n\t"
 7247             "cmovnel $dst, $src" %}
 7248   ins_encode %{
 7249     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7250     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7251   %}
 7252   ins_pipe(pipe_cmov_reg);
 7253 %}
 7254 
 7255 // Conditional move
 7256 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 7257   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7258 
 7259   ins_cost(250); // XXX
 7260   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7261   ins_encode %{
 7262     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7263   %}
 7264   ins_pipe(pipe_cmov_mem);
 7265 %}
 7266 
 7267 // Conditional move
 7268 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 7269 %{
 7270   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7271 
 7272   ins_cost(250); // XXX
 7273   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7274   ins_encode %{
 7275     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7276   %}
 7277   ins_pipe(pipe_cmov_mem);
 7278 %}
 7279 
 7280 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 7281   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7282   ins_cost(250);
 7283   expand %{
 7284     cmovI_memU(cop, cr, dst, src);
 7285   %}
 7286 %}
 7287 
 7288 // Conditional move
 7289 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 7290 %{
 7291   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7292 
 7293   ins_cost(200); // XXX
 7294   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 7295   ins_encode %{
 7296     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7297   %}
 7298   ins_pipe(pipe_cmov_reg);
 7299 %}
 7300 
 7301 // Conditional move
 7302 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 7303 %{
 7304   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7305 
 7306   ins_cost(200); // XXX
 7307   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 7308   ins_encode %{
 7309     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7310   %}
 7311   ins_pipe(pipe_cmov_reg);
 7312 %}
 7313 
 7314 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7315   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7316   ins_cost(200);
 7317   expand %{
 7318     cmovN_regU(cop, cr, dst, src);
 7319   %}
 7320 %}
 7321 
 7322 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7323   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7324   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7325 
 7326   ins_cost(200); // XXX
 7327   format %{ "cmovpl  $dst, $src\n\t"
 7328             "cmovnel $dst, $src" %}
 7329   ins_encode %{
 7330     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7331     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7332   %}
 7333   ins_pipe(pipe_cmov_reg);
 7334 %}
 7335 
 7336 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7337 // inputs of the CMove
 7338 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7339   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7340   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 7341 
 7342   ins_cost(200); // XXX
 7343   format %{ "cmovpl  $dst, $src\n\t"
 7344             "cmovnel $dst, $src" %}
 7345   ins_encode %{
 7346     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7347     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7348   %}
 7349   ins_pipe(pipe_cmov_reg);
 7350 %}
 7351 
 7352 // Conditional move
 7353 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 7354 %{
 7355   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7356 
 7357   ins_cost(200); // XXX
 7358   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 7359   ins_encode %{
 7360     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7361   %}
 7362   ins_pipe(pipe_cmov_reg);  // XXX
 7363 %}
 7364 
 7365 // Conditional move
 7366 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 7367 %{
 7368   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7369 
 7370   ins_cost(200); // XXX
 7371   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 7372   ins_encode %{
 7373     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7374   %}
 7375   ins_pipe(pipe_cmov_reg); // XXX
 7376 %}
 7377 
 7378 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7379   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7380   ins_cost(200);
 7381   expand %{
 7382     cmovP_regU(cop, cr, dst, src);
 7383   %}
 7384 %}
 7385 
 7386 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7387   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7388   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7389 
 7390   ins_cost(200); // XXX
 7391   format %{ "cmovpq  $dst, $src\n\t"
 7392             "cmovneq $dst, $src" %}
 7393   ins_encode %{
 7394     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7395     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7396   %}
 7397   ins_pipe(pipe_cmov_reg);
 7398 %}
 7399 
 7400 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7401 // inputs of the CMove
 7402 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7403   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7404   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 7405 
 7406   ins_cost(200); // XXX
 7407   format %{ "cmovpq  $dst, $src\n\t"
 7408             "cmovneq $dst, $src" %}
 7409   ins_encode %{
 7410     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7411     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7412   %}
 7413   ins_pipe(pipe_cmov_reg);
 7414 %}
 7415 
 7416 // DISABLED: Requires the ADLC to emit a bottom_type call that
 7417 // correctly meets the two pointer arguments; one is an incoming
 7418 // register but the other is a memory operand.  ALSO appears to
 7419 // be buggy with implicit null checks.
 7420 //
 7421 //// Conditional move
 7422 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
 7423 //%{
 7424 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7425 //  ins_cost(250);
 7426 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7427 //  opcode(0x0F,0x40);
 7428 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7429 //  ins_pipe( pipe_cmov_mem );
 7430 //%}
 7431 //
 7432 //// Conditional move
 7433 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
 7434 //%{
 7435 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7436 //  ins_cost(250);
 7437 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7438 //  opcode(0x0F,0x40);
 7439 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7440 //  ins_pipe( pipe_cmov_mem );
 7441 //%}
 7442 
 7443 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7444 %{
 7445   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7446   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7447 
 7448   ins_cost(100); // XXX
 7449   format %{ "setbn$cop $dst\t# signed, long" %}
 7450   ins_encode %{
 7451     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7452     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7453   %}
 7454   ins_pipe(ialu_reg);
 7455 %}
 7456 
 7457 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 7458 %{
 7459   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7460 
 7461   ins_cost(200); // XXX
 7462   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7463   ins_encode %{
 7464     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7465   %}
 7466   ins_pipe(pipe_cmov_reg);  // XXX
 7467 %}
 7468 
 7469 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 7470 %{
 7471   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7472 
 7473   ins_cost(200); // XXX
 7474   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7475   ins_encode %{
 7476     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7477   %}
 7478   ins_pipe(pipe_cmov_mem);  // XXX
 7479 %}
 7480 
 7481 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7482 %{
 7483   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7484   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7485 
 7486   ins_cost(100); // XXX
 7487   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7488   ins_encode %{
 7489     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7490     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7491   %}
 7492   ins_pipe(ialu_reg);
 7493 %}
 7494 
 7495 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 7496 %{
 7497   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7498 
 7499   ins_cost(200); // XXX
 7500   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7501   ins_encode %{
 7502     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7503   %}
 7504   ins_pipe(pipe_cmov_reg); // XXX
 7505 %}
 7506 
 7507 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7508 %{
 7509   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7510   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7511 
 7512   ins_cost(100); // XXX
 7513   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7514   ins_encode %{
 7515     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7516     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7517   %}
 7518   ins_pipe(ialu_reg);
 7519 %}
 7520 
 7521 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7522   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7523   ins_cost(200);
 7524   expand %{
 7525     cmovL_regU(cop, cr, dst, src);
 7526   %}
 7527 %}
 7528 
 7529 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7530   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7531   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7532 
 7533   ins_cost(200); // XXX
 7534   format %{ "cmovpq  $dst, $src\n\t"
 7535             "cmovneq $dst, $src" %}
 7536   ins_encode %{
 7537     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7538     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7539   %}
 7540   ins_pipe(pipe_cmov_reg);
 7541 %}
 7542 
 7543 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7544 // inputs of the CMove
 7545 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7546   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7547   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7548 
 7549   ins_cost(200); // XXX
 7550   format %{ "cmovpq  $dst, $src\n\t"
 7551             "cmovneq $dst, $src" %}
 7552   ins_encode %{
 7553     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7554     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7555   %}
 7556   ins_pipe(pipe_cmov_reg);
 7557 %}
 7558 
 7559 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 7560 %{
 7561   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7562 
 7563   ins_cost(200); // XXX
 7564   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7565   ins_encode %{
 7566     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7567   %}
 7568   ins_pipe(pipe_cmov_mem); // XXX
 7569 %}
 7570 
 7571 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 7572   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7573   ins_cost(200);
 7574   expand %{
 7575     cmovL_memU(cop, cr, dst, src);
 7576   %}
 7577 %}
 7578 
 7579 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 7580 %{
 7581   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7582 
 7583   ins_cost(200); // XXX
 7584   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7585             "movss     $dst, $src\n"
 7586     "skip:" %}
 7587   ins_encode %{
 7588     Label Lskip;
 7589     // Invert sense of branch from sense of CMOV
 7590     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7591     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7592     __ bind(Lskip);
 7593   %}
 7594   ins_pipe(pipe_slow);
 7595 %}
 7596 
 7597 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
 7598 // %{
 7599 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
 7600 
 7601 //   ins_cost(200); // XXX
 7602 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7603 //             "movss     $dst, $src\n"
 7604 //     "skip:" %}
 7605 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
 7606 //   ins_pipe(pipe_slow);
 7607 // %}
 7608 
 7609 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 7610 %{
 7611   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7612 
 7613   ins_cost(200); // XXX
 7614   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 7615             "movss     $dst, $src\n"
 7616     "skip:" %}
 7617   ins_encode %{
 7618     Label Lskip;
 7619     // Invert sense of branch from sense of CMOV
 7620     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7621     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7622     __ bind(Lskip);
 7623   %}
 7624   ins_pipe(pipe_slow);
 7625 %}
 7626 
 7627 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 7628   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7629   ins_cost(200);
 7630   expand %{
 7631     cmovF_regU(cop, cr, dst, src);
 7632   %}
 7633 %}
 7634 
 7635 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 7636 %{
 7637   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7638 
 7639   ins_cost(200); // XXX
 7640   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 7641             "movsd     $dst, $src\n"
 7642     "skip:" %}
 7643   ins_encode %{
 7644     Label Lskip;
 7645     // Invert sense of branch from sense of CMOV
 7646     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7647     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7648     __ bind(Lskip);
 7649   %}
 7650   ins_pipe(pipe_slow);
 7651 %}
 7652 
 7653 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 7654 %{
 7655   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7656 
 7657   ins_cost(200); // XXX
 7658   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 7659             "movsd     $dst, $src\n"
 7660     "skip:" %}
 7661   ins_encode %{
 7662     Label Lskip;
 7663     // Invert sense of branch from sense of CMOV
 7664     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7665     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7666     __ bind(Lskip);
 7667   %}
 7668   ins_pipe(pipe_slow);
 7669 %}
 7670 
 7671 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 7672   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7673   ins_cost(200);
 7674   expand %{
 7675     cmovD_regU(cop, cr, dst, src);
 7676   %}
 7677 %}
 7678 
 7679 //----------Arithmetic Instructions--------------------------------------------
 7680 //----------Addition Instructions----------------------------------------------
 7681 
 7682 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7683 %{
 7684   match(Set dst (AddI dst src));
 7685   effect(KILL cr);
 7686 
 7687   format %{ "addl    $dst, $src\t# int" %}
 7688   ins_encode %{
 7689     __ addl($dst$$Register, $src$$Register);
 7690   %}
 7691   ins_pipe(ialu_reg_reg);
 7692 %}
 7693 
 7694 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 7695 %{
 7696   match(Set dst (AddI dst src));
 7697   effect(KILL cr);
 7698 
 7699   format %{ "addl    $dst, $src\t# int" %}
 7700   ins_encode %{
 7701     __ addl($dst$$Register, $src$$constant);
 7702   %}
 7703   ins_pipe( ialu_reg );
 7704 %}
 7705 
 7706 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7707 %{
 7708   match(Set dst (AddI dst (LoadI src)));
 7709   effect(KILL cr);
 7710 
 7711   ins_cost(150); // XXX
 7712   format %{ "addl    $dst, $src\t# int" %}
 7713   ins_encode %{
 7714     __ addl($dst$$Register, $src$$Address);
 7715   %}
 7716   ins_pipe(ialu_reg_mem);
 7717 %}
 7718 
 7719 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7720 %{
 7721   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7722   effect(KILL cr);
 7723 
 7724   ins_cost(150); // XXX
 7725   format %{ "addl    $dst, $src\t# int" %}
 7726   ins_encode %{
 7727     __ addl($dst$$Address, $src$$Register);
 7728   %}
 7729   ins_pipe(ialu_mem_reg);
 7730 %}
 7731 
 7732 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 7733 %{
 7734   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7735   effect(KILL cr);
 7736 
 7737   ins_cost(125); // XXX
 7738   format %{ "addl    $dst, $src\t# int" %}
 7739   ins_encode %{
 7740     __ addl($dst$$Address, $src$$constant);
 7741   %}
 7742   ins_pipe(ialu_mem_imm);
 7743 %}
 7744 
 7745 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 7746 %{
 7747   predicate(UseIncDec);
 7748   match(Set dst (AddI dst src));
 7749   effect(KILL cr);
 7750 
 7751   format %{ "incl    $dst\t# int" %}
 7752   ins_encode %{
 7753     __ incrementl($dst$$Register);
 7754   %}
 7755   ins_pipe(ialu_reg);
 7756 %}
 7757 
 7758 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 7759 %{
 7760   predicate(UseIncDec);
 7761   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7762   effect(KILL cr);
 7763 
 7764   ins_cost(125); // XXX
 7765   format %{ "incl    $dst\t# int" %}
 7766   ins_encode %{
 7767     __ incrementl($dst$$Address);
 7768   %}
 7769   ins_pipe(ialu_mem_imm);
 7770 %}
 7771 
 7772 // XXX why does that use AddI
 7773 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 7774 %{
 7775   predicate(UseIncDec);
 7776   match(Set dst (AddI dst src));
 7777   effect(KILL cr);
 7778 
 7779   format %{ "decl    $dst\t# int" %}
 7780   ins_encode %{
 7781     __ decrementl($dst$$Register);
 7782   %}
 7783   ins_pipe(ialu_reg);
 7784 %}
 7785 
 7786 // XXX why does that use AddI
 7787 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 7788 %{
 7789   predicate(UseIncDec);
 7790   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7791   effect(KILL cr);
 7792 
 7793   ins_cost(125); // XXX
 7794   format %{ "decl    $dst\t# int" %}
 7795   ins_encode %{
 7796     __ decrementl($dst$$Address);
 7797   %}
 7798   ins_pipe(ialu_mem_imm);
 7799 %}
 7800 
 7801 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 7802 %{
 7803   predicate(VM_Version::supports_fast_2op_lea());
 7804   match(Set dst (AddI (LShiftI index scale) disp));
 7805 
 7806   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 7807   ins_encode %{
 7808     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7809     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7810   %}
 7811   ins_pipe(ialu_reg_reg);
 7812 %}
 7813 
 7814 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 7815 %{
 7816   predicate(VM_Version::supports_fast_3op_lea());
 7817   match(Set dst (AddI (AddI base index) disp));
 7818 
 7819   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 7820   ins_encode %{
 7821     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7822   %}
 7823   ins_pipe(ialu_reg_reg);
 7824 %}
 7825 
 7826 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 7827 %{
 7828   predicate(VM_Version::supports_fast_2op_lea());
 7829   match(Set dst (AddI base (LShiftI index scale)));
 7830 
 7831   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 7832   ins_encode %{
 7833     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7834     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7835   %}
 7836   ins_pipe(ialu_reg_reg);
 7837 %}
 7838 
 7839 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 7840 %{
 7841   predicate(VM_Version::supports_fast_3op_lea());
 7842   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 7843 
 7844   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 7845   ins_encode %{
 7846     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7847     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7848   %}
 7849   ins_pipe(ialu_reg_reg);
 7850 %}
 7851 
 7852 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7853 %{
 7854   match(Set dst (AddL dst src));
 7855   effect(KILL cr);
 7856 
 7857   format %{ "addq    $dst, $src\t# long" %}
 7858   ins_encode %{
 7859     __ addq($dst$$Register, $src$$Register);
 7860   %}
 7861   ins_pipe(ialu_reg_reg);
 7862 %}
 7863 
 7864 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 7865 %{
 7866   match(Set dst (AddL dst src));
 7867   effect(KILL cr);
 7868 
 7869   format %{ "addq    $dst, $src\t# long" %}
 7870   ins_encode %{
 7871     __ addq($dst$$Register, $src$$constant);
 7872   %}
 7873   ins_pipe( ialu_reg );
 7874 %}
 7875 
 7876 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7877 %{
 7878   match(Set dst (AddL dst (LoadL src)));
 7879   effect(KILL cr);
 7880 
 7881   ins_cost(150); // XXX
 7882   format %{ "addq    $dst, $src\t# long" %}
 7883   ins_encode %{
 7884     __ addq($dst$$Register, $src$$Address);
 7885   %}
 7886   ins_pipe(ialu_reg_mem);
 7887 %}
 7888 
 7889 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7890 %{
 7891   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7892   effect(KILL cr);
 7893 
 7894   ins_cost(150); // XXX
 7895   format %{ "addq    $dst, $src\t# long" %}
 7896   ins_encode %{
 7897     __ addq($dst$$Address, $src$$Register);
 7898   %}
 7899   ins_pipe(ialu_mem_reg);
 7900 %}
 7901 
 7902 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7903 %{
 7904   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7905   effect(KILL cr);
 7906 
 7907   ins_cost(125); // XXX
 7908   format %{ "addq    $dst, $src\t# long" %}
 7909   ins_encode %{
 7910     __ addq($dst$$Address, $src$$constant);
 7911   %}
 7912   ins_pipe(ialu_mem_imm);
 7913 %}
 7914 
 7915 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7916 %{
 7917   predicate(UseIncDec);
 7918   match(Set dst (AddL dst src));
 7919   effect(KILL cr);
 7920 
 7921   format %{ "incq    $dst\t# long" %}
 7922   ins_encode %{
 7923     __ incrementq($dst$$Register);
 7924   %}
 7925   ins_pipe(ialu_reg);
 7926 %}
 7927 
 7928 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7929 %{
 7930   predicate(UseIncDec);
 7931   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7932   effect(KILL cr);
 7933 
 7934   ins_cost(125); // XXX
 7935   format %{ "incq    $dst\t# long" %}
 7936   ins_encode %{
 7937     __ incrementq($dst$$Address);
 7938   %}
 7939   ins_pipe(ialu_mem_imm);
 7940 %}
 7941 
 7942 // XXX why does that use AddL
 7943 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7944 %{
 7945   predicate(UseIncDec);
 7946   match(Set dst (AddL dst src));
 7947   effect(KILL cr);
 7948 
 7949   format %{ "decq    $dst\t# long" %}
 7950   ins_encode %{
 7951     __ decrementq($dst$$Register);
 7952   %}
 7953   ins_pipe(ialu_reg);
 7954 %}
 7955 
 7956 // XXX why does that use AddL
 7957 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7958 %{
 7959   predicate(UseIncDec);
 7960   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7961   effect(KILL cr);
 7962 
 7963   ins_cost(125); // XXX
 7964   format %{ "decq    $dst\t# long" %}
 7965   ins_encode %{
 7966     __ decrementq($dst$$Address);
 7967   %}
 7968   ins_pipe(ialu_mem_imm);
 7969 %}
 7970 
 7971 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7972 %{
 7973   predicate(VM_Version::supports_fast_2op_lea());
 7974   match(Set dst (AddL (LShiftL index scale) disp));
 7975 
 7976   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7977   ins_encode %{
 7978     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7979     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7980   %}
 7981   ins_pipe(ialu_reg_reg);
 7982 %}
 7983 
 7984 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7985 %{
 7986   predicate(VM_Version::supports_fast_3op_lea());
 7987   match(Set dst (AddL (AddL base index) disp));
 7988 
 7989   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7990   ins_encode %{
 7991     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7992   %}
 7993   ins_pipe(ialu_reg_reg);
 7994 %}
 7995 
 7996 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7997 %{
 7998   predicate(VM_Version::supports_fast_2op_lea());
 7999   match(Set dst (AddL base (LShiftL index scale)));
 8000 
 8001   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 8002   ins_encode %{
 8003     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 8004     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 8005   %}
 8006   ins_pipe(ialu_reg_reg);
 8007 %}
 8008 
 8009 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 8010 %{
 8011   predicate(VM_Version::supports_fast_3op_lea());
 8012   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 8013 
 8014   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 8015   ins_encode %{
 8016     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 8017     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 8018   %}
 8019   ins_pipe(ialu_reg_reg);
 8020 %}
 8021 
 8022 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 8023 %{
 8024   match(Set dst (AddP dst src));
 8025   effect(KILL cr);
 8026 
 8027   format %{ "addq    $dst, $src\t# ptr" %}
 8028   ins_encode %{
 8029     __ addq($dst$$Register, $src$$Register);
 8030   %}
 8031   ins_pipe(ialu_reg_reg);
 8032 %}
 8033 
 8034 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 8035 %{
 8036   match(Set dst (AddP dst src));
 8037   effect(KILL cr);
 8038 
 8039   format %{ "addq    $dst, $src\t# ptr" %}
 8040   ins_encode %{
 8041     __ addq($dst$$Register, $src$$constant);
 8042   %}
 8043   ins_pipe( ialu_reg );
 8044 %}
 8045 
 8046 // XXX addP mem ops ????
 8047 
 8048 instruct checkCastPP(rRegP dst)
 8049 %{
 8050   match(Set dst (CheckCastPP dst));
 8051 
 8052   size(0);
 8053   format %{ "# checkcastPP of $dst" %}
 8054   ins_encode(/* empty encoding */);
 8055   ins_pipe(empty);
 8056 %}
 8057 
 8058 instruct castPP(rRegP dst)
 8059 %{
 8060   match(Set dst (CastPP dst));
 8061 
 8062   size(0);
 8063   format %{ "# castPP of $dst" %}
 8064   ins_encode(/* empty encoding */);
 8065   ins_pipe(empty);
 8066 %}
 8067 
 8068 instruct castII(rRegI dst)
 8069 %{
 8070   match(Set dst (CastII dst));
 8071 
 8072   size(0);
 8073   format %{ "# castII of $dst" %}
 8074   ins_encode(/* empty encoding */);
 8075   ins_cost(0);
 8076   ins_pipe(empty);
 8077 %}
 8078 
 8079 instruct castLL(rRegL dst)
 8080 %{
 8081   match(Set dst (CastLL dst));
 8082 
 8083   size(0);
 8084   format %{ "# castLL of $dst" %}
 8085   ins_encode(/* empty encoding */);
 8086   ins_cost(0);
 8087   ins_pipe(empty);
 8088 %}
 8089 
 8090 instruct castFF(regF dst)
 8091 %{
 8092   match(Set dst (CastFF dst));
 8093 
 8094   size(0);
 8095   format %{ "# castFF of $dst" %}
 8096   ins_encode(/* empty encoding */);
 8097   ins_cost(0);
 8098   ins_pipe(empty);
 8099 %}
 8100 
 8101 instruct castDD(regD dst)
 8102 %{
 8103   match(Set dst (CastDD dst));
 8104 
 8105   size(0);
 8106   format %{ "# castDD of $dst" %}
 8107   ins_encode(/* empty encoding */);
 8108   ins_cost(0);
 8109   ins_pipe(empty);
 8110 %}
 8111 
 8112 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 8113 instruct compareAndSwapP(rRegI res,
 8114                          memory mem_ptr,
 8115                          rax_RegP oldval, rRegP newval,
 8116                          rFlagsReg cr)
 8117 %{
 8118   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8119   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 8120   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 8121   effect(KILL cr, KILL oldval);
 8122 
 8123   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8124             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8125             "sete    $res\n\t"
 8126             "movzbl  $res, $res" %}
 8127   ins_encode %{
 8128     __ lock();
 8129     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8130     __ setb(Assembler::equal, $res$$Register);
 8131     __ movzbl($res$$Register, $res$$Register);
 8132   %}
 8133   ins_pipe( pipe_cmpxchg );
 8134 %}
 8135 
 8136 instruct compareAndSwapL(rRegI res,
 8137                          memory mem_ptr,
 8138                          rax_RegL oldval, rRegL newval,
 8139                          rFlagsReg cr)
 8140 %{
 8141   predicate(VM_Version::supports_cx8());
 8142   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 8143   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 8144   effect(KILL cr, KILL oldval);
 8145 
 8146   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8147             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8148             "sete    $res\n\t"
 8149             "movzbl  $res, $res" %}
 8150   ins_encode %{
 8151     __ lock();
 8152     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8153     __ setb(Assembler::equal, $res$$Register);
 8154     __ movzbl($res$$Register, $res$$Register);
 8155   %}
 8156   ins_pipe( pipe_cmpxchg );
 8157 %}
 8158 
 8159 instruct compareAndSwapI(rRegI res,
 8160                          memory mem_ptr,
 8161                          rax_RegI oldval, rRegI newval,
 8162                          rFlagsReg cr)
 8163 %{
 8164   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 8165   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 8166   effect(KILL cr, KILL oldval);
 8167 
 8168   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8169             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8170             "sete    $res\n\t"
 8171             "movzbl  $res, $res" %}
 8172   ins_encode %{
 8173     __ lock();
 8174     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8175     __ setb(Assembler::equal, $res$$Register);
 8176     __ movzbl($res$$Register, $res$$Register);
 8177   %}
 8178   ins_pipe( pipe_cmpxchg );
 8179 %}
 8180 
 8181 instruct compareAndSwapB(rRegI res,
 8182                          memory mem_ptr,
 8183                          rax_RegI oldval, rRegI newval,
 8184                          rFlagsReg cr)
 8185 %{
 8186   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 8187   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 8188   effect(KILL cr, KILL oldval);
 8189 
 8190   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8191             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8192             "sete    $res\n\t"
 8193             "movzbl  $res, $res" %}
 8194   ins_encode %{
 8195     __ lock();
 8196     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8197     __ setb(Assembler::equal, $res$$Register);
 8198     __ movzbl($res$$Register, $res$$Register);
 8199   %}
 8200   ins_pipe( pipe_cmpxchg );
 8201 %}
 8202 
 8203 instruct compareAndSwapS(rRegI res,
 8204                          memory mem_ptr,
 8205                          rax_RegI oldval, rRegI newval,
 8206                          rFlagsReg cr)
 8207 %{
 8208   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 8209   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 8210   effect(KILL cr, KILL oldval);
 8211 
 8212   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8213             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8214             "sete    $res\n\t"
 8215             "movzbl  $res, $res" %}
 8216   ins_encode %{
 8217     __ lock();
 8218     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8219     __ setb(Assembler::equal, $res$$Register);
 8220     __ movzbl($res$$Register, $res$$Register);
 8221   %}
 8222   ins_pipe( pipe_cmpxchg );
 8223 %}
 8224 
 8225 instruct compareAndSwapN(rRegI res,
 8226                           memory mem_ptr,
 8227                           rax_RegN oldval, rRegN newval,
 8228                           rFlagsReg cr) %{
 8229   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 8230   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 8231   effect(KILL cr, KILL oldval);
 8232 
 8233   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8234             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8235             "sete    $res\n\t"
 8236             "movzbl  $res, $res" %}
 8237   ins_encode %{
 8238     __ lock();
 8239     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8240     __ setb(Assembler::equal, $res$$Register);
 8241     __ movzbl($res$$Register, $res$$Register);
 8242   %}
 8243   ins_pipe( pipe_cmpxchg );
 8244 %}
 8245 
 8246 instruct compareAndExchangeB(
 8247                          memory mem_ptr,
 8248                          rax_RegI oldval, rRegI newval,
 8249                          rFlagsReg cr)
 8250 %{
 8251   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 8252   effect(KILL cr);
 8253 
 8254   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8255             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8256   ins_encode %{
 8257     __ lock();
 8258     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8259   %}
 8260   ins_pipe( pipe_cmpxchg );
 8261 %}
 8262 
 8263 instruct compareAndExchangeS(
 8264                          memory mem_ptr,
 8265                          rax_RegI oldval, rRegI newval,
 8266                          rFlagsReg cr)
 8267 %{
 8268   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 8269   effect(KILL cr);
 8270 
 8271   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8272             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8273   ins_encode %{
 8274     __ lock();
 8275     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8276   %}
 8277   ins_pipe( pipe_cmpxchg );
 8278 %}
 8279 
 8280 instruct compareAndExchangeI(
 8281                          memory mem_ptr,
 8282                          rax_RegI oldval, rRegI newval,
 8283                          rFlagsReg cr)
 8284 %{
 8285   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 8286   effect(KILL cr);
 8287 
 8288   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8289             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8290   ins_encode %{
 8291     __ lock();
 8292     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8293   %}
 8294   ins_pipe( pipe_cmpxchg );
 8295 %}
 8296 
 8297 instruct compareAndExchangeL(
 8298                          memory mem_ptr,
 8299                          rax_RegL oldval, rRegL newval,
 8300                          rFlagsReg cr)
 8301 %{
 8302   predicate(VM_Version::supports_cx8());
 8303   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 8304   effect(KILL cr);
 8305 
 8306   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8307             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8308   ins_encode %{
 8309     __ lock();
 8310     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8311   %}
 8312   ins_pipe( pipe_cmpxchg );
 8313 %}
 8314 
 8315 instruct compareAndExchangeN(
 8316                           memory mem_ptr,
 8317                           rax_RegN oldval, rRegN newval,
 8318                           rFlagsReg cr) %{
 8319   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 8320   effect(KILL cr);
 8321 
 8322   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8323             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8324   ins_encode %{
 8325     __ lock();
 8326     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8327   %}
 8328   ins_pipe( pipe_cmpxchg );
 8329 %}
 8330 
 8331 instruct compareAndExchangeP(
 8332                          memory mem_ptr,
 8333                          rax_RegP oldval, rRegP newval,
 8334                          rFlagsReg cr)
 8335 %{
 8336   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8337   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 8338   effect(KILL cr);
 8339 
 8340   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8341             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8342   ins_encode %{
 8343     __ lock();
 8344     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8345   %}
 8346   ins_pipe( pipe_cmpxchg );
 8347 %}
 8348 
 8349 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 8350   predicate(n->as_LoadStore()->result_not_used());
 8351   match(Set dummy (GetAndAddB mem add));
 8352   effect(KILL cr);
 8353   format %{ "addb_lock   $mem, $add" %}
 8354   ins_encode %{
 8355     __ lock();
 8356     __ addb($mem$$Address, $add$$Register);
 8357   %}
 8358   ins_pipe(pipe_cmpxchg);
 8359 %}
 8360 
 8361 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8362   predicate(n->as_LoadStore()->result_not_used());
 8363   match(Set dummy (GetAndAddB mem add));
 8364   effect(KILL cr);
 8365   format %{ "addb_lock   $mem, $add" %}
 8366   ins_encode %{
 8367     __ lock();
 8368     __ addb($mem$$Address, $add$$constant);
 8369   %}
 8370   ins_pipe(pipe_cmpxchg);
 8371 %}
 8372 
 8373 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
 8374   predicate(!n->as_LoadStore()->result_not_used());
 8375   match(Set newval (GetAndAddB mem newval));
 8376   effect(KILL cr);
 8377   format %{ "xaddb_lock  $mem, $newval" %}
 8378   ins_encode %{
 8379     __ lock();
 8380     __ xaddb($mem$$Address, $newval$$Register);
 8381   %}
 8382   ins_pipe(pipe_cmpxchg);
 8383 %}
 8384 
 8385 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 8386   predicate(n->as_LoadStore()->result_not_used());
 8387   match(Set dummy (GetAndAddS mem add));
 8388   effect(KILL cr);
 8389   format %{ "addw_lock   $mem, $add" %}
 8390   ins_encode %{
 8391     __ lock();
 8392     __ addw($mem$$Address, $add$$Register);
 8393   %}
 8394   ins_pipe(pipe_cmpxchg);
 8395 %}
 8396 
 8397 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8398   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
 8399   match(Set dummy (GetAndAddS mem add));
 8400   effect(KILL cr);
 8401   format %{ "addw_lock   $mem, $add" %}
 8402   ins_encode %{
 8403     __ lock();
 8404     __ addw($mem$$Address, $add$$constant);
 8405   %}
 8406   ins_pipe(pipe_cmpxchg);
 8407 %}
 8408 
 8409 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
 8410   predicate(!n->as_LoadStore()->result_not_used());
 8411   match(Set newval (GetAndAddS mem newval));
 8412   effect(KILL cr);
 8413   format %{ "xaddw_lock  $mem, $newval" %}
 8414   ins_encode %{
 8415     __ lock();
 8416     __ xaddw($mem$$Address, $newval$$Register);
 8417   %}
 8418   ins_pipe(pipe_cmpxchg);
 8419 %}
 8420 
 8421 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 8422   predicate(n->as_LoadStore()->result_not_used());
 8423   match(Set dummy (GetAndAddI mem add));
 8424   effect(KILL cr);
 8425   format %{ "addl_lock   $mem, $add" %}
 8426   ins_encode %{
 8427     __ lock();
 8428     __ addl($mem$$Address, $add$$Register);
 8429   %}
 8430   ins_pipe(pipe_cmpxchg);
 8431 %}
 8432 
 8433 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8434   predicate(n->as_LoadStore()->result_not_used());
 8435   match(Set dummy (GetAndAddI mem add));
 8436   effect(KILL cr);
 8437   format %{ "addl_lock   $mem, $add" %}
 8438   ins_encode %{
 8439     __ lock();
 8440     __ addl($mem$$Address, $add$$constant);
 8441   %}
 8442   ins_pipe(pipe_cmpxchg);
 8443 %}
 8444 
 8445 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
 8446   predicate(!n->as_LoadStore()->result_not_used());
 8447   match(Set newval (GetAndAddI mem newval));
 8448   effect(KILL cr);
 8449   format %{ "xaddl_lock  $mem, $newval" %}
 8450   ins_encode %{
 8451     __ lock();
 8452     __ xaddl($mem$$Address, $newval$$Register);
 8453   %}
 8454   ins_pipe(pipe_cmpxchg);
 8455 %}
 8456 
 8457 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
 8458   predicate(n->as_LoadStore()->result_not_used());
 8459   match(Set dummy (GetAndAddL mem add));
 8460   effect(KILL cr);
 8461   format %{ "addq_lock   $mem, $add" %}
 8462   ins_encode %{
 8463     __ lock();
 8464     __ addq($mem$$Address, $add$$Register);
 8465   %}
 8466   ins_pipe(pipe_cmpxchg);
 8467 %}
 8468 
 8469 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 8470   predicate(n->as_LoadStore()->result_not_used());
 8471   match(Set dummy (GetAndAddL mem add));
 8472   effect(KILL cr);
 8473   format %{ "addq_lock   $mem, $add" %}
 8474   ins_encode %{
 8475     __ lock();
 8476     __ addq($mem$$Address, $add$$constant);
 8477   %}
 8478   ins_pipe(pipe_cmpxchg);
 8479 %}
 8480 
 8481 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
 8482   predicate(!n->as_LoadStore()->result_not_used());
 8483   match(Set newval (GetAndAddL mem newval));
 8484   effect(KILL cr);
 8485   format %{ "xaddq_lock  $mem, $newval" %}
 8486   ins_encode %{
 8487     __ lock();
 8488     __ xaddq($mem$$Address, $newval$$Register);
 8489   %}
 8490   ins_pipe(pipe_cmpxchg);
 8491 %}
 8492 
 8493 instruct xchgB( memory mem, rRegI newval) %{
 8494   match(Set newval (GetAndSetB mem newval));
 8495   format %{ "XCHGB  $newval,[$mem]" %}
 8496   ins_encode %{
 8497     __ xchgb($newval$$Register, $mem$$Address);
 8498   %}
 8499   ins_pipe( pipe_cmpxchg );
 8500 %}
 8501 
 8502 instruct xchgS( memory mem, rRegI newval) %{
 8503   match(Set newval (GetAndSetS mem newval));
 8504   format %{ "XCHGW  $newval,[$mem]" %}
 8505   ins_encode %{
 8506     __ xchgw($newval$$Register, $mem$$Address);
 8507   %}
 8508   ins_pipe( pipe_cmpxchg );
 8509 %}
 8510 
 8511 instruct xchgI( memory mem, rRegI newval) %{
 8512   match(Set newval (GetAndSetI mem newval));
 8513   format %{ "XCHGL  $newval,[$mem]" %}
 8514   ins_encode %{
 8515     __ xchgl($newval$$Register, $mem$$Address);
 8516   %}
 8517   ins_pipe( pipe_cmpxchg );
 8518 %}
 8519 
 8520 instruct xchgL( memory mem, rRegL newval) %{
 8521   match(Set newval (GetAndSetL mem newval));
 8522   format %{ "XCHGL  $newval,[$mem]" %}
 8523   ins_encode %{
 8524     __ xchgq($newval$$Register, $mem$$Address);
 8525   %}
 8526   ins_pipe( pipe_cmpxchg );
 8527 %}
 8528 
 8529 instruct xchgP( memory mem, rRegP newval) %{
 8530   match(Set newval (GetAndSetP mem newval));
 8531   predicate(n->as_LoadStore()->barrier_data() == 0);
 8532   format %{ "XCHGQ  $newval,[$mem]" %}
 8533   ins_encode %{
 8534     __ xchgq($newval$$Register, $mem$$Address);
 8535   %}
 8536   ins_pipe( pipe_cmpxchg );
 8537 %}
 8538 
 8539 instruct xchgN( memory mem, rRegN newval) %{
 8540   match(Set newval (GetAndSetN mem newval));
 8541   format %{ "XCHGL  $newval,$mem]" %}
 8542   ins_encode %{
 8543     __ xchgl($newval$$Register, $mem$$Address);
 8544   %}
 8545   ins_pipe( pipe_cmpxchg );
 8546 %}
 8547 
 8548 //----------Abs Instructions-------------------------------------------
 8549 
 8550 // Integer Absolute Instructions
 8551 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8552 %{
 8553   match(Set dst (AbsI src));
 8554   effect(TEMP dst, KILL cr);
 8555   format %{ "xorl    $dst, $dst\t# abs int\n\t"
 8556             "subl    $dst, $src\n\t"
 8557             "cmovll  $dst, $src" %}
 8558   ins_encode %{
 8559     __ xorl($dst$$Register, $dst$$Register);
 8560     __ subl($dst$$Register, $src$$Register);
 8561     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
 8562   %}
 8563 
 8564   ins_pipe(ialu_reg_reg);
 8565 %}
 8566 
 8567 // Long Absolute Instructions
 8568 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8569 %{
 8570   match(Set dst (AbsL src));
 8571   effect(TEMP dst, KILL cr);
 8572   format %{ "xorl    $dst, $dst\t# abs long\n\t"
 8573             "subq    $dst, $src\n\t"
 8574             "cmovlq  $dst, $src" %}
 8575   ins_encode %{
 8576     __ xorl($dst$$Register, $dst$$Register);
 8577     __ subq($dst$$Register, $src$$Register);
 8578     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
 8579   %}
 8580 
 8581   ins_pipe(ialu_reg_reg);
 8582 %}
 8583 
 8584 //----------Subtraction Instructions-------------------------------------------
 8585 
 8586 // Integer Subtraction Instructions
 8587 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8588 %{
 8589   match(Set dst (SubI dst src));
 8590   effect(KILL cr);
 8591 
 8592   format %{ "subl    $dst, $src\t# int" %}
 8593   ins_encode %{
 8594     __ subl($dst$$Register, $src$$Register);
 8595   %}
 8596   ins_pipe(ialu_reg_reg);
 8597 %}
 8598 
 8599 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 8600 %{
 8601   match(Set dst (SubI dst (LoadI src)));
 8602   effect(KILL cr);
 8603 
 8604   ins_cost(150);
 8605   format %{ "subl    $dst, $src\t# int" %}
 8606   ins_encode %{
 8607     __ subl($dst$$Register, $src$$Address);
 8608   %}
 8609   ins_pipe(ialu_reg_mem);
 8610 %}
 8611 
 8612 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 8613 %{
 8614   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 8615   effect(KILL cr);
 8616 
 8617   ins_cost(150);
 8618   format %{ "subl    $dst, $src\t# int" %}
 8619   ins_encode %{
 8620     __ subl($dst$$Address, $src$$Register);
 8621   %}
 8622   ins_pipe(ialu_mem_reg);
 8623 %}
 8624 
 8625 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8626 %{
 8627   match(Set dst (SubL dst src));
 8628   effect(KILL cr);
 8629 
 8630   format %{ "subq    $dst, $src\t# long" %}
 8631   ins_encode %{
 8632     __ subq($dst$$Register, $src$$Register);
 8633   %}
 8634   ins_pipe(ialu_reg_reg);
 8635 %}
 8636 
 8637 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 8638 %{
 8639   match(Set dst (SubL dst (LoadL src)));
 8640   effect(KILL cr);
 8641 
 8642   ins_cost(150);
 8643   format %{ "subq    $dst, $src\t# long" %}
 8644   ins_encode %{
 8645     __ subq($dst$$Register, $src$$Address);
 8646   %}
 8647   ins_pipe(ialu_reg_mem);
 8648 %}
 8649 
 8650 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 8651 %{
 8652   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 8653   effect(KILL cr);
 8654 
 8655   ins_cost(150);
 8656   format %{ "subq    $dst, $src\t# long" %}
 8657   ins_encode %{
 8658     __ subq($dst$$Address, $src$$Register);
 8659   %}
 8660   ins_pipe(ialu_mem_reg);
 8661 %}
 8662 
 8663 // Subtract from a pointer
 8664 // XXX hmpf???
 8665 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 8666 %{
 8667   match(Set dst (AddP dst (SubI zero src)));
 8668   effect(KILL cr);
 8669 
 8670   format %{ "subq    $dst, $src\t# ptr - int" %}
 8671   opcode(0x2B);
 8672   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
 8673   ins_pipe(ialu_reg_reg);
 8674 %}
 8675 
 8676 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 8677 %{
 8678   match(Set dst (SubI zero dst));
 8679   effect(KILL cr);
 8680 
 8681   format %{ "negl    $dst\t# int" %}
 8682   ins_encode %{
 8683     __ negl($dst$$Register);
 8684   %}
 8685   ins_pipe(ialu_reg);
 8686 %}
 8687 
 8688 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 8689 %{
 8690   match(Set dst (NegI dst));
 8691   effect(KILL cr);
 8692 
 8693   format %{ "negl    $dst\t# int" %}
 8694   ins_encode %{
 8695     __ negl($dst$$Register);
 8696   %}
 8697   ins_pipe(ialu_reg);
 8698 %}
 8699 
 8700 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 8701 %{
 8702   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 8703   effect(KILL cr);
 8704 
 8705   format %{ "negl    $dst\t# int" %}
 8706   ins_encode %{
 8707     __ negl($dst$$Address);
 8708   %}
 8709   ins_pipe(ialu_reg);
 8710 %}
 8711 
 8712 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 8713 %{
 8714   match(Set dst (SubL zero dst));
 8715   effect(KILL cr);
 8716 
 8717   format %{ "negq    $dst\t# long" %}
 8718   ins_encode %{
 8719     __ negq($dst$$Register);
 8720   %}
 8721   ins_pipe(ialu_reg);
 8722 %}
 8723 
 8724 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 8725 %{
 8726   match(Set dst (NegL dst));
 8727   effect(KILL cr);
 8728 
 8729   format %{ "negq    $dst\t# int" %}
 8730   ins_encode %{
 8731     __ negq($dst$$Register);
 8732   %}
 8733   ins_pipe(ialu_reg);
 8734 %}
 8735 
 8736 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 8737 %{
 8738   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 8739   effect(KILL cr);
 8740 
 8741   format %{ "negq    $dst\t# long" %}
 8742   ins_encode %{
 8743     __ negq($dst$$Address);
 8744   %}
 8745   ins_pipe(ialu_reg);
 8746 %}
 8747 
 8748 //----------Multiplication/Division Instructions-------------------------------
 8749 // Integer Multiplication Instructions
 8750 // Multiply Register
 8751 
 8752 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8753 %{
 8754   match(Set dst (MulI dst src));
 8755   effect(KILL cr);
 8756 
 8757   ins_cost(300);
 8758   format %{ "imull   $dst, $src\t# int" %}
 8759   ins_encode %{
 8760     __ imull($dst$$Register, $src$$Register);
 8761   %}
 8762   ins_pipe(ialu_reg_reg_alu0);
 8763 %}
 8764 
 8765 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 8766 %{
 8767   match(Set dst (MulI src imm));
 8768   effect(KILL cr);
 8769 
 8770   ins_cost(300);
 8771   format %{ "imull   $dst, $src, $imm\t# int" %}
 8772   ins_encode %{
 8773     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 8774   %}
 8775   ins_pipe(ialu_reg_reg_alu0);
 8776 %}
 8777 
 8778 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 8779 %{
 8780   match(Set dst (MulI dst (LoadI src)));
 8781   effect(KILL cr);
 8782 
 8783   ins_cost(350);
 8784   format %{ "imull   $dst, $src\t# int" %}
 8785   ins_encode %{
 8786     __ imull($dst$$Register, $src$$Address);
 8787   %}
 8788   ins_pipe(ialu_reg_mem_alu0);
 8789 %}
 8790 
 8791 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 8792 %{
 8793   match(Set dst (MulI (LoadI src) imm));
 8794   effect(KILL cr);
 8795 
 8796   ins_cost(300);
 8797   format %{ "imull   $dst, $src, $imm\t# int" %}
 8798   ins_encode %{
 8799     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 8800   %}
 8801   ins_pipe(ialu_reg_mem_alu0);
 8802 %}
 8803 
 8804 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 8805 %{
 8806   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 8807   effect(KILL cr, KILL src2);
 8808 
 8809   expand %{ mulI_rReg(dst, src1, cr);
 8810            mulI_rReg(src2, src3, cr);
 8811            addI_rReg(dst, src2, cr); %}
 8812 %}
 8813 
 8814 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8815 %{
 8816   match(Set dst (MulL dst src));
 8817   effect(KILL cr);
 8818 
 8819   ins_cost(300);
 8820   format %{ "imulq   $dst, $src\t# long" %}
 8821   ins_encode %{
 8822     __ imulq($dst$$Register, $src$$Register);
 8823   %}
 8824   ins_pipe(ialu_reg_reg_alu0);
 8825 %}
 8826 
 8827 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 8828 %{
 8829   match(Set dst (MulL src imm));
 8830   effect(KILL cr);
 8831 
 8832   ins_cost(300);
 8833   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8834   ins_encode %{
 8835     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 8836   %}
 8837   ins_pipe(ialu_reg_reg_alu0);
 8838 %}
 8839 
 8840 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 8841 %{
 8842   match(Set dst (MulL dst (LoadL src)));
 8843   effect(KILL cr);
 8844 
 8845   ins_cost(350);
 8846   format %{ "imulq   $dst, $src\t# long" %}
 8847   ins_encode %{
 8848     __ imulq($dst$$Register, $src$$Address);
 8849   %}
 8850   ins_pipe(ialu_reg_mem_alu0);
 8851 %}
 8852 
 8853 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 8854 %{
 8855   match(Set dst (MulL (LoadL src) imm));
 8856   effect(KILL cr);
 8857 
 8858   ins_cost(300);
 8859   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8860   ins_encode %{
 8861     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 8862   %}
 8863   ins_pipe(ialu_reg_mem_alu0);
 8864 %}
 8865 
 8866 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8867 %{
 8868   match(Set dst (MulHiL src rax));
 8869   effect(USE_KILL rax, KILL cr);
 8870 
 8871   ins_cost(300);
 8872   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 8873   ins_encode %{
 8874     __ imulq($src$$Register);
 8875   %}
 8876   ins_pipe(ialu_reg_reg_alu0);
 8877 %}
 8878 
 8879 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8880 %{
 8881   match(Set dst (UMulHiL src rax));
 8882   effect(USE_KILL rax, KILL cr);
 8883 
 8884   ins_cost(300);
 8885   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8886   ins_encode %{
 8887     __ mulq($src$$Register);
 8888   %}
 8889   ins_pipe(ialu_reg_reg_alu0);
 8890 %}
 8891 
 8892 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8893                    rFlagsReg cr)
 8894 %{
 8895   match(Set rax (DivI rax div));
 8896   effect(KILL rdx, KILL cr);
 8897 
 8898   ins_cost(30*100+10*100); // XXX
 8899   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8900             "jne,s   normal\n\t"
 8901             "xorl    rdx, rdx\n\t"
 8902             "cmpl    $div, -1\n\t"
 8903             "je,s    done\n"
 8904     "normal: cdql\n\t"
 8905             "idivl   $div\n"
 8906     "done:"        %}
 8907   ins_encode(cdql_enc(div));
 8908   ins_pipe(ialu_reg_reg_alu0);
 8909 %}
 8910 
 8911 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8912                    rFlagsReg cr)
 8913 %{
 8914   match(Set rax (DivL rax div));
 8915   effect(KILL rdx, KILL cr);
 8916 
 8917   ins_cost(30*100+10*100); // XXX
 8918   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8919             "cmpq    rax, rdx\n\t"
 8920             "jne,s   normal\n\t"
 8921             "xorl    rdx, rdx\n\t"
 8922             "cmpq    $div, -1\n\t"
 8923             "je,s    done\n"
 8924     "normal: cdqq\n\t"
 8925             "idivq   $div\n"
 8926     "done:"        %}
 8927   ins_encode(cdqq_enc(div));
 8928   ins_pipe(ialu_reg_reg_alu0);
 8929 %}
 8930 
 8931 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8932 %{
 8933   match(Set rax (UDivI rax div));
 8934   effect(KILL rdx, KILL cr);
 8935 
 8936   ins_cost(300);
 8937   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8938   ins_encode %{
 8939     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8940   %}
 8941   ins_pipe(ialu_reg_reg_alu0);
 8942 %}
 8943 
 8944 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8945 %{
 8946   match(Set rax (UDivL rax div));
 8947   effect(KILL rdx, KILL cr);
 8948 
 8949   ins_cost(300);
 8950   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8951   ins_encode %{
 8952      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8953   %}
 8954   ins_pipe(ialu_reg_reg_alu0);
 8955 %}
 8956 
 8957 // Integer DIVMOD with Register, both quotient and mod results
 8958 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8959                              rFlagsReg cr)
 8960 %{
 8961   match(DivModI rax div);
 8962   effect(KILL cr);
 8963 
 8964   ins_cost(30*100+10*100); // XXX
 8965   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8966             "jne,s   normal\n\t"
 8967             "xorl    rdx, rdx\n\t"
 8968             "cmpl    $div, -1\n\t"
 8969             "je,s    done\n"
 8970     "normal: cdql\n\t"
 8971             "idivl   $div\n"
 8972     "done:"        %}
 8973   ins_encode(cdql_enc(div));
 8974   ins_pipe(pipe_slow);
 8975 %}
 8976 
 8977 // Long DIVMOD with Register, both quotient and mod results
 8978 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8979                              rFlagsReg cr)
 8980 %{
 8981   match(DivModL rax div);
 8982   effect(KILL cr);
 8983 
 8984   ins_cost(30*100+10*100); // XXX
 8985   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8986             "cmpq    rax, rdx\n\t"
 8987             "jne,s   normal\n\t"
 8988             "xorl    rdx, rdx\n\t"
 8989             "cmpq    $div, -1\n\t"
 8990             "je,s    done\n"
 8991     "normal: cdqq\n\t"
 8992             "idivq   $div\n"
 8993     "done:"        %}
 8994   ins_encode(cdqq_enc(div));
 8995   ins_pipe(pipe_slow);
 8996 %}
 8997 
 8998 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8999 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 9000                               no_rax_rdx_RegI div, rFlagsReg cr)
 9001 %{
 9002   match(UDivModI rax div);
 9003   effect(TEMP tmp, KILL cr);
 9004 
 9005   ins_cost(300);
 9006   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 9007             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 9008           %}
 9009   ins_encode %{
 9010     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 9011   %}
 9012   ins_pipe(pipe_slow);
 9013 %}
 9014 
 9015 // Unsigned long DIVMOD with Register, both quotient and mod results
 9016 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 9017                               no_rax_rdx_RegL div, rFlagsReg cr)
 9018 %{
 9019   match(UDivModL rax div);
 9020   effect(TEMP tmp, KILL cr);
 9021 
 9022   ins_cost(300);
 9023   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 9024             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 9025           %}
 9026   ins_encode %{
 9027     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 9028   %}
 9029   ins_pipe(pipe_slow);
 9030 %}
 9031 
 9032 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 9033                    rFlagsReg cr)
 9034 %{
 9035   match(Set rdx (ModI rax div));
 9036   effect(KILL rax, KILL cr);
 9037 
 9038   ins_cost(300); // XXX
 9039   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 9040             "jne,s   normal\n\t"
 9041             "xorl    rdx, rdx\n\t"
 9042             "cmpl    $div, -1\n\t"
 9043             "je,s    done\n"
 9044     "normal: cdql\n\t"
 9045             "idivl   $div\n"
 9046     "done:"        %}
 9047   ins_encode(cdql_enc(div));
 9048   ins_pipe(ialu_reg_reg_alu0);
 9049 %}
 9050 
 9051 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 9052                    rFlagsReg cr)
 9053 %{
 9054   match(Set rdx (ModL rax div));
 9055   effect(KILL rax, KILL cr);
 9056 
 9057   ins_cost(300); // XXX
 9058   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 9059             "cmpq    rax, rdx\n\t"
 9060             "jne,s   normal\n\t"
 9061             "xorl    rdx, rdx\n\t"
 9062             "cmpq    $div, -1\n\t"
 9063             "je,s    done\n"
 9064     "normal: cdqq\n\t"
 9065             "idivq   $div\n"
 9066     "done:"        %}
 9067   ins_encode(cdqq_enc(div));
 9068   ins_pipe(ialu_reg_reg_alu0);
 9069 %}
 9070 
 9071 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 9072 %{
 9073   match(Set rdx (UModI rax div));
 9074   effect(KILL rax, KILL cr);
 9075 
 9076   ins_cost(300);
 9077   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 9078   ins_encode %{
 9079     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 9080   %}
 9081   ins_pipe(ialu_reg_reg_alu0);
 9082 %}
 9083 
 9084 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 9085 %{
 9086   match(Set rdx (UModL rax div));
 9087   effect(KILL rax, KILL cr);
 9088 
 9089   ins_cost(300);
 9090   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 9091   ins_encode %{
 9092     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 9093   %}
 9094   ins_pipe(ialu_reg_reg_alu0);
 9095 %}
 9096 
 9097 // Integer Shift Instructions
 9098 // Shift Left by one, two, three
 9099 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
 9100 %{
 9101   match(Set dst (LShiftI dst shift));
 9102   effect(KILL cr);
 9103 
 9104   format %{ "sall    $dst, $shift" %}
 9105   ins_encode %{
 9106     __ sall($dst$$Register, $shift$$constant);
 9107   %}
 9108   ins_pipe(ialu_reg);
 9109 %}
 9110 
 9111 // Shift Left by 8-bit immediate
 9112 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9113 %{
 9114   match(Set dst (LShiftI dst shift));
 9115   effect(KILL cr);
 9116 
 9117   format %{ "sall    $dst, $shift" %}
 9118   ins_encode %{
 9119     __ sall($dst$$Register, $shift$$constant);
 9120   %}
 9121   ins_pipe(ialu_reg);
 9122 %}
 9123 
 9124 // Shift Left by 8-bit immediate
 9125 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9126 %{
 9127   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9128   effect(KILL cr);
 9129 
 9130   format %{ "sall    $dst, $shift" %}
 9131   ins_encode %{
 9132     __ sall($dst$$Address, $shift$$constant);
 9133   %}
 9134   ins_pipe(ialu_mem_imm);
 9135 %}
 9136 
 9137 // Shift Left by variable
 9138 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9139 %{
 9140   predicate(!VM_Version::supports_bmi2());
 9141   match(Set dst (LShiftI dst shift));
 9142   effect(KILL cr);
 9143 
 9144   format %{ "sall    $dst, $shift" %}
 9145   ins_encode %{
 9146     __ sall($dst$$Register);
 9147   %}
 9148   ins_pipe(ialu_reg_reg);
 9149 %}
 9150 
 9151 // Shift Left by variable
 9152 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9153 %{
 9154   predicate(!VM_Version::supports_bmi2());
 9155   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9156   effect(KILL cr);
 9157 
 9158   format %{ "sall    $dst, $shift" %}
 9159   ins_encode %{
 9160     __ sall($dst$$Address);
 9161   %}
 9162   ins_pipe(ialu_mem_reg);
 9163 %}
 9164 
 9165 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9166 %{
 9167   predicate(VM_Version::supports_bmi2());
 9168   match(Set dst (LShiftI src shift));
 9169 
 9170   format %{ "shlxl   $dst, $src, $shift" %}
 9171   ins_encode %{
 9172     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 9173   %}
 9174   ins_pipe(ialu_reg_reg);
 9175 %}
 9176 
 9177 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9178 %{
 9179   predicate(VM_Version::supports_bmi2());
 9180   match(Set dst (LShiftI (LoadI src) shift));
 9181   ins_cost(175);
 9182   format %{ "shlxl   $dst, $src, $shift" %}
 9183   ins_encode %{
 9184     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 9185   %}
 9186   ins_pipe(ialu_reg_mem);
 9187 %}
 9188 
 9189 // Arithmetic Shift Right by 8-bit immediate
 9190 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9191 %{
 9192   match(Set dst (RShiftI dst shift));
 9193   effect(KILL cr);
 9194 
 9195   format %{ "sarl    $dst, $shift" %}
 9196   ins_encode %{
 9197     __ sarl($dst$$Register, $shift$$constant);
 9198   %}
 9199   ins_pipe(ialu_mem_imm);
 9200 %}
 9201 
 9202 // Arithmetic Shift Right by 8-bit immediate
 9203 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9204 %{
 9205   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9206   effect(KILL cr);
 9207 
 9208   format %{ "sarl    $dst, $shift" %}
 9209   ins_encode %{
 9210     __ sarl($dst$$Address, $shift$$constant);
 9211   %}
 9212   ins_pipe(ialu_mem_imm);
 9213 %}
 9214 
 9215 // Arithmetic Shift Right by variable
 9216 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9217 %{
 9218   predicate(!VM_Version::supports_bmi2());
 9219   match(Set dst (RShiftI dst shift));
 9220   effect(KILL cr);
 9221   format %{ "sarl    $dst, $shift" %}
 9222   ins_encode %{
 9223     __ sarl($dst$$Register);
 9224   %}
 9225   ins_pipe(ialu_reg_reg);
 9226 %}
 9227 
 9228 // Arithmetic Shift Right by variable
 9229 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9230 %{
 9231   predicate(!VM_Version::supports_bmi2());
 9232   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9233   effect(KILL cr);
 9234 
 9235   format %{ "sarl    $dst, $shift" %}
 9236   ins_encode %{
 9237     __ sarl($dst$$Address);
 9238   %}
 9239   ins_pipe(ialu_mem_reg);
 9240 %}
 9241 
 9242 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9243 %{
 9244   predicate(VM_Version::supports_bmi2());
 9245   match(Set dst (RShiftI src shift));
 9246 
 9247   format %{ "sarxl   $dst, $src, $shift" %}
 9248   ins_encode %{
 9249     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 9250   %}
 9251   ins_pipe(ialu_reg_reg);
 9252 %}
 9253 
 9254 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9255 %{
 9256   predicate(VM_Version::supports_bmi2());
 9257   match(Set dst (RShiftI (LoadI src) shift));
 9258   ins_cost(175);
 9259   format %{ "sarxl   $dst, $src, $shift" %}
 9260   ins_encode %{
 9261     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 9262   %}
 9263   ins_pipe(ialu_reg_mem);
 9264 %}
 9265 
 9266 // Logical Shift Right by 8-bit immediate
 9267 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9268 %{
 9269   match(Set dst (URShiftI dst shift));
 9270   effect(KILL cr);
 9271 
 9272   format %{ "shrl    $dst, $shift" %}
 9273   ins_encode %{
 9274     __ shrl($dst$$Register, $shift$$constant);
 9275   %}
 9276   ins_pipe(ialu_reg);
 9277 %}
 9278 
 9279 // Logical Shift Right by 8-bit immediate
 9280 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9281 %{
 9282   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9283   effect(KILL cr);
 9284 
 9285   format %{ "shrl    $dst, $shift" %}
 9286   ins_encode %{
 9287     __ shrl($dst$$Address, $shift$$constant);
 9288   %}
 9289   ins_pipe(ialu_mem_imm);
 9290 %}
 9291 
 9292 // Logical Shift Right by variable
 9293 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9294 %{
 9295   predicate(!VM_Version::supports_bmi2());
 9296   match(Set dst (URShiftI dst shift));
 9297   effect(KILL cr);
 9298 
 9299   format %{ "shrl    $dst, $shift" %}
 9300   ins_encode %{
 9301     __ shrl($dst$$Register);
 9302   %}
 9303   ins_pipe(ialu_reg_reg);
 9304 %}
 9305 
 9306 // Logical Shift Right by variable
 9307 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9308 %{
 9309   predicate(!VM_Version::supports_bmi2());
 9310   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9311   effect(KILL cr);
 9312 
 9313   format %{ "shrl    $dst, $shift" %}
 9314   ins_encode %{
 9315     __ shrl($dst$$Address);
 9316   %}
 9317   ins_pipe(ialu_mem_reg);
 9318 %}
 9319 
 9320 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9321 %{
 9322   predicate(VM_Version::supports_bmi2());
 9323   match(Set dst (URShiftI src shift));
 9324 
 9325   format %{ "shrxl   $dst, $src, $shift" %}
 9326   ins_encode %{
 9327     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 9328   %}
 9329   ins_pipe(ialu_reg_reg);
 9330 %}
 9331 
 9332 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9333 %{
 9334   predicate(VM_Version::supports_bmi2());
 9335   match(Set dst (URShiftI (LoadI src) shift));
 9336   ins_cost(175);
 9337   format %{ "shrxl   $dst, $src, $shift" %}
 9338   ins_encode %{
 9339     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 9340   %}
 9341   ins_pipe(ialu_reg_mem);
 9342 %}
 9343 
 9344 // Long Shift Instructions
 9345 // Shift Left by one, two, three
 9346 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
 9347 %{
 9348   match(Set dst (LShiftL dst shift));
 9349   effect(KILL cr);
 9350 
 9351   format %{ "salq    $dst, $shift" %}
 9352   ins_encode %{
 9353     __ salq($dst$$Register, $shift$$constant);
 9354   %}
 9355   ins_pipe(ialu_reg);
 9356 %}
 9357 
 9358 // Shift Left by 8-bit immediate
 9359 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9360 %{
 9361   match(Set dst (LShiftL dst shift));
 9362   effect(KILL cr);
 9363 
 9364   format %{ "salq    $dst, $shift" %}
 9365   ins_encode %{
 9366     __ salq($dst$$Register, $shift$$constant);
 9367   %}
 9368   ins_pipe(ialu_reg);
 9369 %}
 9370 
 9371 // Shift Left by 8-bit immediate
 9372 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9373 %{
 9374   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9375   effect(KILL cr);
 9376 
 9377   format %{ "salq    $dst, $shift" %}
 9378   ins_encode %{
 9379     __ salq($dst$$Address, $shift$$constant);
 9380   %}
 9381   ins_pipe(ialu_mem_imm);
 9382 %}
 9383 
 9384 // Shift Left by variable
 9385 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9386 %{
 9387   predicate(!VM_Version::supports_bmi2());
 9388   match(Set dst (LShiftL dst shift));
 9389   effect(KILL cr);
 9390 
 9391   format %{ "salq    $dst, $shift" %}
 9392   ins_encode %{
 9393     __ salq($dst$$Register);
 9394   %}
 9395   ins_pipe(ialu_reg_reg);
 9396 %}
 9397 
 9398 // Shift Left by variable
 9399 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9400 %{
 9401   predicate(!VM_Version::supports_bmi2());
 9402   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9403   effect(KILL cr);
 9404 
 9405   format %{ "salq    $dst, $shift" %}
 9406   ins_encode %{
 9407     __ salq($dst$$Address);
 9408   %}
 9409   ins_pipe(ialu_mem_reg);
 9410 %}
 9411 
 9412 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9413 %{
 9414   predicate(VM_Version::supports_bmi2());
 9415   match(Set dst (LShiftL src shift));
 9416 
 9417   format %{ "shlxq   $dst, $src, $shift" %}
 9418   ins_encode %{
 9419     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 9420   %}
 9421   ins_pipe(ialu_reg_reg);
 9422 %}
 9423 
 9424 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9425 %{
 9426   predicate(VM_Version::supports_bmi2());
 9427   match(Set dst (LShiftL (LoadL src) shift));
 9428   ins_cost(175);
 9429   format %{ "shlxq   $dst, $src, $shift" %}
 9430   ins_encode %{
 9431     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 9432   %}
 9433   ins_pipe(ialu_reg_mem);
 9434 %}
 9435 
 9436 // Arithmetic Shift Right by 8-bit immediate
 9437 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 9438 %{
 9439   match(Set dst (RShiftL dst shift));
 9440   effect(KILL cr);
 9441 
 9442   format %{ "sarq    $dst, $shift" %}
 9443   ins_encode %{
 9444     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 9445   %}
 9446   ins_pipe(ialu_mem_imm);
 9447 %}
 9448 
 9449 // Arithmetic Shift Right by 8-bit immediate
 9450 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 9451 %{
 9452   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9453   effect(KILL cr);
 9454 
 9455   format %{ "sarq    $dst, $shift" %}
 9456   ins_encode %{
 9457     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 9458   %}
 9459   ins_pipe(ialu_mem_imm);
 9460 %}
 9461 
 9462 // Arithmetic Shift Right by variable
 9463 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9464 %{
 9465   predicate(!VM_Version::supports_bmi2());
 9466   match(Set dst (RShiftL dst shift));
 9467   effect(KILL cr);
 9468 
 9469   format %{ "sarq    $dst, $shift" %}
 9470   ins_encode %{
 9471     __ sarq($dst$$Register);
 9472   %}
 9473   ins_pipe(ialu_reg_reg);
 9474 %}
 9475 
 9476 // Arithmetic Shift Right by variable
 9477 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9478 %{
 9479   predicate(!VM_Version::supports_bmi2());
 9480   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9481   effect(KILL cr);
 9482 
 9483   format %{ "sarq    $dst, $shift" %}
 9484   ins_encode %{
 9485     __ sarq($dst$$Address);
 9486   %}
 9487   ins_pipe(ialu_mem_reg);
 9488 %}
 9489 
 9490 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9491 %{
 9492   predicate(VM_Version::supports_bmi2());
 9493   match(Set dst (RShiftL src shift));
 9494 
 9495   format %{ "sarxq   $dst, $src, $shift" %}
 9496   ins_encode %{
 9497     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 9498   %}
 9499   ins_pipe(ialu_reg_reg);
 9500 %}
 9501 
 9502 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9503 %{
 9504   predicate(VM_Version::supports_bmi2());
 9505   match(Set dst (RShiftL (LoadL src) shift));
 9506   ins_cost(175);
 9507   format %{ "sarxq   $dst, $src, $shift" %}
 9508   ins_encode %{
 9509     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 9510   %}
 9511   ins_pipe(ialu_reg_mem);
 9512 %}
 9513 
 9514 // Logical Shift Right by 8-bit immediate
 9515 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9516 %{
 9517   match(Set dst (URShiftL dst shift));
 9518   effect(KILL cr);
 9519 
 9520   format %{ "shrq    $dst, $shift" %}
 9521   ins_encode %{
 9522     __ shrq($dst$$Register, $shift$$constant);
 9523   %}
 9524   ins_pipe(ialu_reg);
 9525 %}
 9526 
 9527 // Logical Shift Right by 8-bit immediate
 9528 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9529 %{
 9530   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9531   effect(KILL cr);
 9532 
 9533   format %{ "shrq    $dst, $shift" %}
 9534   ins_encode %{
 9535     __ shrq($dst$$Address, $shift$$constant);
 9536   %}
 9537   ins_pipe(ialu_mem_imm);
 9538 %}
 9539 
 9540 // Logical Shift Right by variable
 9541 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9542 %{
 9543   predicate(!VM_Version::supports_bmi2());
 9544   match(Set dst (URShiftL dst shift));
 9545   effect(KILL cr);
 9546 
 9547   format %{ "shrq    $dst, $shift" %}
 9548   ins_encode %{
 9549     __ shrq($dst$$Register);
 9550   %}
 9551   ins_pipe(ialu_reg_reg);
 9552 %}
 9553 
 9554 // Logical Shift Right by variable
 9555 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9556 %{
 9557   predicate(!VM_Version::supports_bmi2());
 9558   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9559   effect(KILL cr);
 9560 
 9561   format %{ "shrq    $dst, $shift" %}
 9562   ins_encode %{
 9563     __ shrq($dst$$Address);
 9564   %}
 9565   ins_pipe(ialu_mem_reg);
 9566 %}
 9567 
 9568 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9569 %{
 9570   predicate(VM_Version::supports_bmi2());
 9571   match(Set dst (URShiftL src shift));
 9572 
 9573   format %{ "shrxq   $dst, $src, $shift" %}
 9574   ins_encode %{
 9575     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 9576   %}
 9577   ins_pipe(ialu_reg_reg);
 9578 %}
 9579 
 9580 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9581 %{
 9582   predicate(VM_Version::supports_bmi2());
 9583   match(Set dst (URShiftL (LoadL src) shift));
 9584   ins_cost(175);
 9585   format %{ "shrxq   $dst, $src, $shift" %}
 9586   ins_encode %{
 9587     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 9588   %}
 9589   ins_pipe(ialu_reg_mem);
 9590 %}
 9591 
 9592 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 9593 // This idiom is used by the compiler for the i2b bytecode.
 9594 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 9595 %{
 9596   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 9597 
 9598   format %{ "movsbl  $dst, $src\t# i2b" %}
 9599   ins_encode %{
 9600     __ movsbl($dst$$Register, $src$$Register);
 9601   %}
 9602   ins_pipe(ialu_reg_reg);
 9603 %}
 9604 
 9605 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 9606 // This idiom is used by the compiler the i2s bytecode.
 9607 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 9608 %{
 9609   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 9610 
 9611   format %{ "movswl  $dst, $src\t# i2s" %}
 9612   ins_encode %{
 9613     __ movswl($dst$$Register, $src$$Register);
 9614   %}
 9615   ins_pipe(ialu_reg_reg);
 9616 %}
 9617 
 9618 // ROL/ROR instructions
 9619 
 9620 // Rotate left by constant.
 9621 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9622 %{
 9623   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9624   match(Set dst (RotateLeft dst shift));
 9625   effect(KILL cr);
 9626   format %{ "roll    $dst, $shift" %}
 9627   ins_encode %{
 9628     __ roll($dst$$Register, $shift$$constant);
 9629   %}
 9630   ins_pipe(ialu_reg);
 9631 %}
 9632 
 9633 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 9634 %{
 9635   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9636   match(Set dst (RotateLeft src shift));
 9637   format %{ "rolxl   $dst, $src, $shift" %}
 9638   ins_encode %{
 9639     int shift = 32 - ($shift$$constant & 31);
 9640     __ rorxl($dst$$Register, $src$$Register, shift);
 9641   %}
 9642   ins_pipe(ialu_reg_reg);
 9643 %}
 9644 
 9645 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9646 %{
 9647   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9648   match(Set dst (RotateLeft (LoadI src) shift));
 9649   ins_cost(175);
 9650   format %{ "rolxl   $dst, $src, $shift" %}
 9651   ins_encode %{
 9652     int shift = 32 - ($shift$$constant & 31);
 9653     __ rorxl($dst$$Register, $src$$Address, shift);
 9654   %}
 9655   ins_pipe(ialu_reg_mem);
 9656 %}
 9657 
 9658 // Rotate Left by variable
 9659 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9660 %{
 9661   predicate(n->bottom_type()->basic_type() == T_INT);
 9662   match(Set dst (RotateLeft dst shift));
 9663   effect(KILL cr);
 9664   format %{ "roll    $dst, $shift" %}
 9665   ins_encode %{
 9666     __ roll($dst$$Register);
 9667   %}
 9668   ins_pipe(ialu_reg_reg);
 9669 %}
 9670 
 9671 // Rotate Right by constant.
 9672 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9673 %{
 9674   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9675   match(Set dst (RotateRight dst shift));
 9676   effect(KILL cr);
 9677   format %{ "rorl    $dst, $shift" %}
 9678   ins_encode %{
 9679     __ rorl($dst$$Register, $shift$$constant);
 9680   %}
 9681   ins_pipe(ialu_reg);
 9682 %}
 9683 
 9684 // Rotate Right by constant.
 9685 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 9686 %{
 9687   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9688   match(Set dst (RotateRight src shift));
 9689   format %{ "rorxl   $dst, $src, $shift" %}
 9690   ins_encode %{
 9691     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 9692   %}
 9693   ins_pipe(ialu_reg_reg);
 9694 %}
 9695 
 9696 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9697 %{
 9698   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9699   match(Set dst (RotateRight (LoadI src) shift));
 9700   ins_cost(175);
 9701   format %{ "rorxl   $dst, $src, $shift" %}
 9702   ins_encode %{
 9703     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 9704   %}
 9705   ins_pipe(ialu_reg_mem);
 9706 %}
 9707 
 9708 // Rotate Right by variable
 9709 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9710 %{
 9711   predicate(n->bottom_type()->basic_type() == T_INT);
 9712   match(Set dst (RotateRight dst shift));
 9713   effect(KILL cr);
 9714   format %{ "rorl    $dst, $shift" %}
 9715   ins_encode %{
 9716     __ rorl($dst$$Register);
 9717   %}
 9718   ins_pipe(ialu_reg_reg);
 9719 %}
 9720 
 9721 // Rotate Left by constant.
 9722 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9723 %{
 9724   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9725   match(Set dst (RotateLeft dst shift));
 9726   effect(KILL cr);
 9727   format %{ "rolq    $dst, $shift" %}
 9728   ins_encode %{
 9729     __ rolq($dst$$Register, $shift$$constant);
 9730   %}
 9731   ins_pipe(ialu_reg);
 9732 %}
 9733 
 9734 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 9735 %{
 9736   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9737   match(Set dst (RotateLeft src shift));
 9738   format %{ "rolxq   $dst, $src, $shift" %}
 9739   ins_encode %{
 9740     int shift = 64 - ($shift$$constant & 63);
 9741     __ rorxq($dst$$Register, $src$$Register, shift);
 9742   %}
 9743   ins_pipe(ialu_reg_reg);
 9744 %}
 9745 
 9746 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9747 %{
 9748   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9749   match(Set dst (RotateLeft (LoadL src) shift));
 9750   ins_cost(175);
 9751   format %{ "rolxq   $dst, $src, $shift" %}
 9752   ins_encode %{
 9753     int shift = 64 - ($shift$$constant & 63);
 9754     __ rorxq($dst$$Register, $src$$Address, shift);
 9755   %}
 9756   ins_pipe(ialu_reg_mem);
 9757 %}
 9758 
 9759 // Rotate Left by variable
 9760 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9761 %{
 9762   predicate(n->bottom_type()->basic_type() == T_LONG);
 9763   match(Set dst (RotateLeft dst shift));
 9764   effect(KILL cr);
 9765   format %{ "rolq    $dst, $shift" %}
 9766   ins_encode %{
 9767     __ rolq($dst$$Register);
 9768   %}
 9769   ins_pipe(ialu_reg_reg);
 9770 %}
 9771 
 9772 // Rotate Right by constant.
 9773 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9774 %{
 9775   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9776   match(Set dst (RotateRight dst shift));
 9777   effect(KILL cr);
 9778   format %{ "rorq    $dst, $shift" %}
 9779   ins_encode %{
 9780     __ rorq($dst$$Register, $shift$$constant);
 9781   %}
 9782   ins_pipe(ialu_reg);
 9783 %}
 9784 
 9785 // Rotate Right by constant
 9786 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 9787 %{
 9788   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9789   match(Set dst (RotateRight src shift));
 9790   format %{ "rorxq   $dst, $src, $shift" %}
 9791   ins_encode %{
 9792     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 9793   %}
 9794   ins_pipe(ialu_reg_reg);
 9795 %}
 9796 
 9797 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9798 %{
 9799   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9800   match(Set dst (RotateRight (LoadL src) shift));
 9801   ins_cost(175);
 9802   format %{ "rorxq   $dst, $src, $shift" %}
 9803   ins_encode %{
 9804     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 9805   %}
 9806   ins_pipe(ialu_reg_mem);
 9807 %}
 9808 
 9809 // Rotate Right by variable
 9810 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9811 %{
 9812   predicate(n->bottom_type()->basic_type() == T_LONG);
 9813   match(Set dst (RotateRight dst shift));
 9814   effect(KILL cr);
 9815   format %{ "rorq    $dst, $shift" %}
 9816   ins_encode %{
 9817     __ rorq($dst$$Register);
 9818   %}
 9819   ins_pipe(ialu_reg_reg);
 9820 %}
 9821 
 9822 //----------------------------- CompressBits/ExpandBits ------------------------
 9823 
 9824 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9825   predicate(n->bottom_type()->isa_long());
 9826   match(Set dst (CompressBits src mask));
 9827   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9828   ins_encode %{
 9829     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 9830   %}
 9831   ins_pipe( pipe_slow );
 9832 %}
 9833 
 9834 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9835   predicate(n->bottom_type()->isa_long());
 9836   match(Set dst (ExpandBits src mask));
 9837   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9838   ins_encode %{
 9839     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 9840   %}
 9841   ins_pipe( pipe_slow );
 9842 %}
 9843 
 9844 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9845   predicate(n->bottom_type()->isa_long());
 9846   match(Set dst (CompressBits src (LoadL mask)));
 9847   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9848   ins_encode %{
 9849     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 9850   %}
 9851   ins_pipe( pipe_slow );
 9852 %}
 9853 
 9854 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9855   predicate(n->bottom_type()->isa_long());
 9856   match(Set dst (ExpandBits src (LoadL mask)));
 9857   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9858   ins_encode %{
 9859     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 9860   %}
 9861   ins_pipe( pipe_slow );
 9862 %}
 9863 
 9864 
 9865 // Logical Instructions
 9866 
 9867 // Integer Logical Instructions
 9868 
 9869 // And Instructions
 9870 // And Register with Register
 9871 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9872 %{
 9873   match(Set dst (AndI dst src));
 9874   effect(KILL cr);
 9875 
 9876   format %{ "andl    $dst, $src\t# int" %}
 9877   ins_encode %{
 9878     __ andl($dst$$Register, $src$$Register);
 9879   %}
 9880   ins_pipe(ialu_reg_reg);
 9881 %}
 9882 
 9883 // And Register with Immediate 255
 9884 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9885 %{
 9886   match(Set dst (AndI src mask));
 9887 
 9888   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9889   ins_encode %{
 9890     __ movzbl($dst$$Register, $src$$Register);
 9891   %}
 9892   ins_pipe(ialu_reg);
 9893 %}
 9894 
 9895 // And Register with Immediate 255 and promote to long
 9896 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9897 %{
 9898   match(Set dst (ConvI2L (AndI src mask)));
 9899 
 9900   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9901   ins_encode %{
 9902     __ movzbl($dst$$Register, $src$$Register);
 9903   %}
 9904   ins_pipe(ialu_reg);
 9905 %}
 9906 
 9907 // And Register with Immediate 65535
 9908 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9909 %{
 9910   match(Set dst (AndI src mask));
 9911 
 9912   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9913   ins_encode %{
 9914     __ movzwl($dst$$Register, $src$$Register);
 9915   %}
 9916   ins_pipe(ialu_reg);
 9917 %}
 9918 
 9919 // And Register with Immediate 65535 and promote to long
 9920 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9921 %{
 9922   match(Set dst (ConvI2L (AndI src mask)));
 9923 
 9924   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9925   ins_encode %{
 9926     __ movzwl($dst$$Register, $src$$Register);
 9927   %}
 9928   ins_pipe(ialu_reg);
 9929 %}
 9930 
 9931 // Can skip int2long conversions after AND with small bitmask
 9932 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9933 %{
 9934   predicate(VM_Version::supports_bmi2());
 9935   ins_cost(125);
 9936   effect(TEMP tmp, KILL cr);
 9937   match(Set dst (ConvI2L (AndI src mask)));
 9938   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9939   ins_encode %{
 9940     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9941     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9942   %}
 9943   ins_pipe(ialu_reg_reg);
 9944 %}
 9945 
 9946 // And Register with Immediate
 9947 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9948 %{
 9949   match(Set dst (AndI dst src));
 9950   effect(KILL cr);
 9951 
 9952   format %{ "andl    $dst, $src\t# int" %}
 9953   ins_encode %{
 9954     __ andl($dst$$Register, $src$$constant);
 9955   %}
 9956   ins_pipe(ialu_reg);
 9957 %}
 9958 
 9959 // And Register with Memory
 9960 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9961 %{
 9962   match(Set dst (AndI dst (LoadI src)));
 9963   effect(KILL cr);
 9964 
 9965   ins_cost(150);
 9966   format %{ "andl    $dst, $src\t# int" %}
 9967   ins_encode %{
 9968     __ andl($dst$$Register, $src$$Address);
 9969   %}
 9970   ins_pipe(ialu_reg_mem);
 9971 %}
 9972 
 9973 // And Memory with Register
 9974 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9975 %{
 9976   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9977   effect(KILL cr);
 9978 
 9979   ins_cost(150);
 9980   format %{ "andb    $dst, $src\t# byte" %}
 9981   ins_encode %{
 9982     __ andb($dst$$Address, $src$$Register);
 9983   %}
 9984   ins_pipe(ialu_mem_reg);
 9985 %}
 9986 
 9987 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9988 %{
 9989   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9990   effect(KILL cr);
 9991 
 9992   ins_cost(150);
 9993   format %{ "andl    $dst, $src\t# int" %}
 9994   ins_encode %{
 9995     __ andl($dst$$Address, $src$$Register);
 9996   %}
 9997   ins_pipe(ialu_mem_reg);
 9998 %}
 9999 
10000 // And Memory with Immediate
10001 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
10002 %{
10003   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
10004   effect(KILL cr);
10005 
10006   ins_cost(125);
10007   format %{ "andl    $dst, $src\t# int" %}
10008   ins_encode %{
10009     __ andl($dst$$Address, $src$$constant);
10010   %}
10011   ins_pipe(ialu_mem_imm);
10012 %}
10013 
10014 // BMI1 instructions
10015 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
10016   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
10017   predicate(UseBMI1Instructions);
10018   effect(KILL cr);
10019 
10020   ins_cost(125);
10021   format %{ "andnl  $dst, $src1, $src2" %}
10022 
10023   ins_encode %{
10024     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
10025   %}
10026   ins_pipe(ialu_reg_mem);
10027 %}
10028 
10029 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
10030   match(Set dst (AndI (XorI src1 minus_1) src2));
10031   predicate(UseBMI1Instructions);
10032   effect(KILL cr);
10033 
10034   format %{ "andnl  $dst, $src1, $src2" %}
10035 
10036   ins_encode %{
10037     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
10038   %}
10039   ins_pipe(ialu_reg);
10040 %}
10041 
10042 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
10043   match(Set dst (AndI (SubI imm_zero src) src));
10044   predicate(UseBMI1Instructions);
10045   effect(KILL cr);
10046 
10047   format %{ "blsil  $dst, $src" %}
10048 
10049   ins_encode %{
10050     __ blsil($dst$$Register, $src$$Register);
10051   %}
10052   ins_pipe(ialu_reg);
10053 %}
10054 
10055 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
10056   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
10057   predicate(UseBMI1Instructions);
10058   effect(KILL cr);
10059 
10060   ins_cost(125);
10061   format %{ "blsil  $dst, $src" %}
10062 
10063   ins_encode %{
10064     __ blsil($dst$$Register, $src$$Address);
10065   %}
10066   ins_pipe(ialu_reg_mem);
10067 %}
10068 
10069 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10070 %{
10071   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
10072   predicate(UseBMI1Instructions);
10073   effect(KILL cr);
10074 
10075   ins_cost(125);
10076   format %{ "blsmskl $dst, $src" %}
10077 
10078   ins_encode %{
10079     __ blsmskl($dst$$Register, $src$$Address);
10080   %}
10081   ins_pipe(ialu_reg_mem);
10082 %}
10083 
10084 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10085 %{
10086   match(Set dst (XorI (AddI src minus_1) src));
10087   predicate(UseBMI1Instructions);
10088   effect(KILL cr);
10089 
10090   format %{ "blsmskl $dst, $src" %}
10091 
10092   ins_encode %{
10093     __ blsmskl($dst$$Register, $src$$Register);
10094   %}
10095 
10096   ins_pipe(ialu_reg);
10097 %}
10098 
10099 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10100 %{
10101   match(Set dst (AndI (AddI src minus_1) src) );
10102   predicate(UseBMI1Instructions);
10103   effect(KILL cr);
10104 
10105   format %{ "blsrl  $dst, $src" %}
10106 
10107   ins_encode %{
10108     __ blsrl($dst$$Register, $src$$Register);
10109   %}
10110 
10111   ins_pipe(ialu_reg_mem);
10112 %}
10113 
10114 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10115 %{
10116   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
10117   predicate(UseBMI1Instructions);
10118   effect(KILL cr);
10119 
10120   ins_cost(125);
10121   format %{ "blsrl  $dst, $src" %}
10122 
10123   ins_encode %{
10124     __ blsrl($dst$$Register, $src$$Address);
10125   %}
10126 
10127   ins_pipe(ialu_reg);
10128 %}
10129 
10130 // Or Instructions
10131 // Or Register with Register
10132 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10133 %{
10134   match(Set dst (OrI dst src));
10135   effect(KILL cr);
10136 
10137   format %{ "orl     $dst, $src\t# int" %}
10138   ins_encode %{
10139     __ orl($dst$$Register, $src$$Register);
10140   %}
10141   ins_pipe(ialu_reg_reg);
10142 %}
10143 
10144 // Or Register with Immediate
10145 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10146 %{
10147   match(Set dst (OrI dst src));
10148   effect(KILL cr);
10149 
10150   format %{ "orl     $dst, $src\t# int" %}
10151   ins_encode %{
10152     __ orl($dst$$Register, $src$$constant);
10153   %}
10154   ins_pipe(ialu_reg);
10155 %}
10156 
10157 // Or Register with Memory
10158 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10159 %{
10160   match(Set dst (OrI dst (LoadI src)));
10161   effect(KILL cr);
10162 
10163   ins_cost(150);
10164   format %{ "orl     $dst, $src\t# int" %}
10165   ins_encode %{
10166     __ orl($dst$$Register, $src$$Address);
10167   %}
10168   ins_pipe(ialu_reg_mem);
10169 %}
10170 
10171 // Or Memory with Register
10172 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10173 %{
10174   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
10175   effect(KILL cr);
10176 
10177   ins_cost(150);
10178   format %{ "orb    $dst, $src\t# byte" %}
10179   ins_encode %{
10180     __ orb($dst$$Address, $src$$Register);
10181   %}
10182   ins_pipe(ialu_mem_reg);
10183 %}
10184 
10185 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10186 %{
10187   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10188   effect(KILL cr);
10189 
10190   ins_cost(150);
10191   format %{ "orl     $dst, $src\t# int" %}
10192   ins_encode %{
10193     __ orl($dst$$Address, $src$$Register);
10194   %}
10195   ins_pipe(ialu_mem_reg);
10196 %}
10197 
10198 // Or Memory with Immediate
10199 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10200 %{
10201   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10202   effect(KILL cr);
10203 
10204   ins_cost(125);
10205   format %{ "orl     $dst, $src\t# int" %}
10206   ins_encode %{
10207     __ orl($dst$$Address, $src$$constant);
10208   %}
10209   ins_pipe(ialu_mem_imm);
10210 %}
10211 
10212 // Xor Instructions
10213 // Xor Register with Register
10214 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10215 %{
10216   match(Set dst (XorI dst src));
10217   effect(KILL cr);
10218 
10219   format %{ "xorl    $dst, $src\t# int" %}
10220   ins_encode %{
10221     __ xorl($dst$$Register, $src$$Register);
10222   %}
10223   ins_pipe(ialu_reg_reg);
10224 %}
10225 
10226 // Xor Register with Immediate -1
10227 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10228   match(Set dst (XorI dst imm));
10229 
10230   format %{ "not    $dst" %}
10231   ins_encode %{
10232      __ notl($dst$$Register);
10233   %}
10234   ins_pipe(ialu_reg);
10235 %}
10236 
10237 // Xor Register with Immediate
10238 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10239 %{
10240   match(Set dst (XorI dst src));
10241   effect(KILL cr);
10242 
10243   format %{ "xorl    $dst, $src\t# int" %}
10244   ins_encode %{
10245     __ xorl($dst$$Register, $src$$constant);
10246   %}
10247   ins_pipe(ialu_reg);
10248 %}
10249 
10250 // Xor Register with Memory
10251 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10252 %{
10253   match(Set dst (XorI dst (LoadI src)));
10254   effect(KILL cr);
10255 
10256   ins_cost(150);
10257   format %{ "xorl    $dst, $src\t# int" %}
10258   ins_encode %{
10259     __ xorl($dst$$Register, $src$$Address);
10260   %}
10261   ins_pipe(ialu_reg_mem);
10262 %}
10263 
10264 // Xor Memory with Register
10265 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10266 %{
10267   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
10268   effect(KILL cr);
10269 
10270   ins_cost(150);
10271   format %{ "xorb    $dst, $src\t# byte" %}
10272   ins_encode %{
10273     __ xorb($dst$$Address, $src$$Register);
10274   %}
10275   ins_pipe(ialu_mem_reg);
10276 %}
10277 
10278 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10279 %{
10280   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10281   effect(KILL cr);
10282 
10283   ins_cost(150);
10284   format %{ "xorl    $dst, $src\t# int" %}
10285   ins_encode %{
10286     __ xorl($dst$$Address, $src$$Register);
10287   %}
10288   ins_pipe(ialu_mem_reg);
10289 %}
10290 
10291 // Xor Memory with Immediate
10292 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10293 %{
10294   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10295   effect(KILL cr);
10296 
10297   ins_cost(125);
10298   format %{ "xorl    $dst, $src\t# int" %}
10299   ins_encode %{
10300     __ xorl($dst$$Address, $src$$constant);
10301   %}
10302   ins_pipe(ialu_mem_imm);
10303 %}
10304 
10305 
10306 // Long Logical Instructions
10307 
10308 // And Instructions
10309 // And Register with Register
10310 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10311 %{
10312   match(Set dst (AndL dst src));
10313   effect(KILL cr);
10314 
10315   format %{ "andq    $dst, $src\t# long" %}
10316   ins_encode %{
10317     __ andq($dst$$Register, $src$$Register);
10318   %}
10319   ins_pipe(ialu_reg_reg);
10320 %}
10321 
10322 // And Register with Immediate 255
10323 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
10324 %{
10325   match(Set dst (AndL src mask));
10326 
10327   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
10328   ins_encode %{
10329     // movzbl zeroes out the upper 32-bit and does not need REX.W
10330     __ movzbl($dst$$Register, $src$$Register);
10331   %}
10332   ins_pipe(ialu_reg);
10333 %}
10334 
10335 // And Register with Immediate 65535
10336 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
10337 %{
10338   match(Set dst (AndL src mask));
10339 
10340   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
10341   ins_encode %{
10342     // movzwl zeroes out the upper 32-bit and does not need REX.W
10343     __ movzwl($dst$$Register, $src$$Register);
10344   %}
10345   ins_pipe(ialu_reg);
10346 %}
10347 
10348 // And Register with Immediate
10349 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10350 %{
10351   match(Set dst (AndL dst src));
10352   effect(KILL cr);
10353 
10354   format %{ "andq    $dst, $src\t# long" %}
10355   ins_encode %{
10356     __ andq($dst$$Register, $src$$constant);
10357   %}
10358   ins_pipe(ialu_reg);
10359 %}
10360 
10361 // And Register with Memory
10362 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10363 %{
10364   match(Set dst (AndL dst (LoadL src)));
10365   effect(KILL cr);
10366 
10367   ins_cost(150);
10368   format %{ "andq    $dst, $src\t# long" %}
10369   ins_encode %{
10370     __ andq($dst$$Register, $src$$Address);
10371   %}
10372   ins_pipe(ialu_reg_mem);
10373 %}
10374 
10375 // And Memory with Register
10376 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10377 %{
10378   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10379   effect(KILL cr);
10380 
10381   ins_cost(150);
10382   format %{ "andq    $dst, $src\t# long" %}
10383   ins_encode %{
10384     __ andq($dst$$Address, $src$$Register);
10385   %}
10386   ins_pipe(ialu_mem_reg);
10387 %}
10388 
10389 // And Memory with Immediate
10390 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10391 %{
10392   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10393   effect(KILL cr);
10394 
10395   ins_cost(125);
10396   format %{ "andq    $dst, $src\t# long" %}
10397   ins_encode %{
10398     __ andq($dst$$Address, $src$$constant);
10399   %}
10400   ins_pipe(ialu_mem_imm);
10401 %}
10402 
10403 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
10404 %{
10405   // con should be a pure 64-bit immediate given that not(con) is a power of 2
10406   // because AND/OR works well enough for 8/32-bit values.
10407   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
10408 
10409   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
10410   effect(KILL cr);
10411 
10412   ins_cost(125);
10413   format %{ "btrq    $dst, log2(not($con))\t# long" %}
10414   ins_encode %{
10415     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
10416   %}
10417   ins_pipe(ialu_mem_imm);
10418 %}
10419 
10420 // BMI1 instructions
10421 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10422   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10423   predicate(UseBMI1Instructions);
10424   effect(KILL cr);
10425 
10426   ins_cost(125);
10427   format %{ "andnq  $dst, $src1, $src2" %}
10428 
10429   ins_encode %{
10430     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10431   %}
10432   ins_pipe(ialu_reg_mem);
10433 %}
10434 
10435 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10436   match(Set dst (AndL (XorL src1 minus_1) src2));
10437   predicate(UseBMI1Instructions);
10438   effect(KILL cr);
10439 
10440   format %{ "andnq  $dst, $src1, $src2" %}
10441 
10442   ins_encode %{
10443   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10444   %}
10445   ins_pipe(ialu_reg_mem);
10446 %}
10447 
10448 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10449   match(Set dst (AndL (SubL imm_zero src) src));
10450   predicate(UseBMI1Instructions);
10451   effect(KILL cr);
10452 
10453   format %{ "blsiq  $dst, $src" %}
10454 
10455   ins_encode %{
10456     __ blsiq($dst$$Register, $src$$Register);
10457   %}
10458   ins_pipe(ialu_reg);
10459 %}
10460 
10461 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10462   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10463   predicate(UseBMI1Instructions);
10464   effect(KILL cr);
10465 
10466   ins_cost(125);
10467   format %{ "blsiq  $dst, $src" %}
10468 
10469   ins_encode %{
10470     __ blsiq($dst$$Register, $src$$Address);
10471   %}
10472   ins_pipe(ialu_reg_mem);
10473 %}
10474 
10475 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10476 %{
10477   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10478   predicate(UseBMI1Instructions);
10479   effect(KILL cr);
10480 
10481   ins_cost(125);
10482   format %{ "blsmskq $dst, $src" %}
10483 
10484   ins_encode %{
10485     __ blsmskq($dst$$Register, $src$$Address);
10486   %}
10487   ins_pipe(ialu_reg_mem);
10488 %}
10489 
10490 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10491 %{
10492   match(Set dst (XorL (AddL src minus_1) src));
10493   predicate(UseBMI1Instructions);
10494   effect(KILL cr);
10495 
10496   format %{ "blsmskq $dst, $src" %}
10497 
10498   ins_encode %{
10499     __ blsmskq($dst$$Register, $src$$Register);
10500   %}
10501 
10502   ins_pipe(ialu_reg);
10503 %}
10504 
10505 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10506 %{
10507   match(Set dst (AndL (AddL src minus_1) src) );
10508   predicate(UseBMI1Instructions);
10509   effect(KILL cr);
10510 
10511   format %{ "blsrq  $dst, $src" %}
10512 
10513   ins_encode %{
10514     __ blsrq($dst$$Register, $src$$Register);
10515   %}
10516 
10517   ins_pipe(ialu_reg);
10518 %}
10519 
10520 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10521 %{
10522   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10523   predicate(UseBMI1Instructions);
10524   effect(KILL cr);
10525 
10526   ins_cost(125);
10527   format %{ "blsrq  $dst, $src" %}
10528 
10529   ins_encode %{
10530     __ blsrq($dst$$Register, $src$$Address);
10531   %}
10532 
10533   ins_pipe(ialu_reg);
10534 %}
10535 
10536 // Or Instructions
10537 // Or Register with Register
10538 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10539 %{
10540   match(Set dst (OrL dst src));
10541   effect(KILL cr);
10542 
10543   format %{ "orq     $dst, $src\t# long" %}
10544   ins_encode %{
10545     __ orq($dst$$Register, $src$$Register);
10546   %}
10547   ins_pipe(ialu_reg_reg);
10548 %}
10549 
10550 // Use any_RegP to match R15 (TLS register) without spilling.
10551 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10552   match(Set dst (OrL dst (CastP2X src)));
10553   effect(KILL cr);
10554 
10555   format %{ "orq     $dst, $src\t# long" %}
10556   ins_encode %{
10557     __ orq($dst$$Register, $src$$Register);
10558   %}
10559   ins_pipe(ialu_reg_reg);
10560 %}
10561 
10562 
10563 // Or Register with Immediate
10564 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10565 %{
10566   match(Set dst (OrL dst src));
10567   effect(KILL cr);
10568 
10569   format %{ "orq     $dst, $src\t# long" %}
10570   ins_encode %{
10571     __ orq($dst$$Register, $src$$constant);
10572   %}
10573   ins_pipe(ialu_reg);
10574 %}
10575 
10576 // Or Register with Memory
10577 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10578 %{
10579   match(Set dst (OrL dst (LoadL src)));
10580   effect(KILL cr);
10581 
10582   ins_cost(150);
10583   format %{ "orq     $dst, $src\t# long" %}
10584   ins_encode %{
10585     __ orq($dst$$Register, $src$$Address);
10586   %}
10587   ins_pipe(ialu_reg_mem);
10588 %}
10589 
10590 // Or Memory with Register
10591 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10592 %{
10593   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10594   effect(KILL cr);
10595 
10596   ins_cost(150);
10597   format %{ "orq     $dst, $src\t# long" %}
10598   ins_encode %{
10599     __ orq($dst$$Address, $src$$Register);
10600   %}
10601   ins_pipe(ialu_mem_reg);
10602 %}
10603 
10604 // Or Memory with Immediate
10605 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10606 %{
10607   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10608   effect(KILL cr);
10609 
10610   ins_cost(125);
10611   format %{ "orq     $dst, $src\t# long" %}
10612   ins_encode %{
10613     __ orq($dst$$Address, $src$$constant);
10614   %}
10615   ins_pipe(ialu_mem_imm);
10616 %}
10617 
10618 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
10619 %{
10620   // con should be a pure 64-bit power of 2 immediate
10621   // because AND/OR works well enough for 8/32-bit values.
10622   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
10623 
10624   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
10625   effect(KILL cr);
10626 
10627   ins_cost(125);
10628   format %{ "btsq    $dst, log2($con)\t# long" %}
10629   ins_encode %{
10630     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
10631   %}
10632   ins_pipe(ialu_mem_imm);
10633 %}
10634 
10635 // Xor Instructions
10636 // Xor Register with Register
10637 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10638 %{
10639   match(Set dst (XorL dst src));
10640   effect(KILL cr);
10641 
10642   format %{ "xorq    $dst, $src\t# long" %}
10643   ins_encode %{
10644     __ xorq($dst$$Register, $src$$Register);
10645   %}
10646   ins_pipe(ialu_reg_reg);
10647 %}
10648 
10649 // Xor Register with Immediate -1
10650 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10651   match(Set dst (XorL dst imm));
10652 
10653   format %{ "notq   $dst" %}
10654   ins_encode %{
10655      __ notq($dst$$Register);
10656   %}
10657   ins_pipe(ialu_reg);
10658 %}
10659 
10660 // Xor Register with Immediate
10661 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10662 %{
10663   match(Set dst (XorL dst src));
10664   effect(KILL cr);
10665 
10666   format %{ "xorq    $dst, $src\t# long" %}
10667   ins_encode %{
10668     __ xorq($dst$$Register, $src$$constant);
10669   %}
10670   ins_pipe(ialu_reg);
10671 %}
10672 
10673 // Xor Register with Memory
10674 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10675 %{
10676   match(Set dst (XorL dst (LoadL src)));
10677   effect(KILL cr);
10678 
10679   ins_cost(150);
10680   format %{ "xorq    $dst, $src\t# long" %}
10681   ins_encode %{
10682     __ xorq($dst$$Register, $src$$Address);
10683   %}
10684   ins_pipe(ialu_reg_mem);
10685 %}
10686 
10687 // Xor Memory with Register
10688 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10689 %{
10690   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10691   effect(KILL cr);
10692 
10693   ins_cost(150);
10694   format %{ "xorq    $dst, $src\t# long" %}
10695   ins_encode %{
10696     __ xorq($dst$$Address, $src$$Register);
10697   %}
10698   ins_pipe(ialu_mem_reg);
10699 %}
10700 
10701 // Xor Memory with Immediate
10702 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10703 %{
10704   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10705   effect(KILL cr);
10706 
10707   ins_cost(125);
10708   format %{ "xorq    $dst, $src\t# long" %}
10709   ins_encode %{
10710     __ xorq($dst$$Address, $src$$constant);
10711   %}
10712   ins_pipe(ialu_mem_imm);
10713 %}
10714 
10715 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10716 %{
10717   match(Set dst (CmpLTMask p q));
10718   effect(KILL cr);
10719 
10720   ins_cost(400);
10721   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10722             "setlt   $dst\n\t"
10723             "movzbl  $dst, $dst\n\t"
10724             "negl    $dst" %}
10725   ins_encode %{
10726     __ cmpl($p$$Register, $q$$Register);
10727     __ setb(Assembler::less, $dst$$Register);
10728     __ movzbl($dst$$Register, $dst$$Register);
10729     __ negl($dst$$Register);
10730   %}
10731   ins_pipe(pipe_slow);
10732 %}
10733 
10734 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
10735 %{
10736   match(Set dst (CmpLTMask dst zero));
10737   effect(KILL cr);
10738 
10739   ins_cost(100);
10740   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10741   ins_encode %{
10742     __ sarl($dst$$Register, 31);
10743   %}
10744   ins_pipe(ialu_reg);
10745 %}
10746 
10747 /* Better to save a register than avoid a branch */
10748 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10749 %{
10750   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10751   effect(KILL cr);
10752   ins_cost(300);
10753   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10754             "jge     done\n\t"
10755             "addl    $p,$y\n"
10756             "done:   " %}
10757   ins_encode %{
10758     Register Rp = $p$$Register;
10759     Register Rq = $q$$Register;
10760     Register Ry = $y$$Register;
10761     Label done;
10762     __ subl(Rp, Rq);
10763     __ jccb(Assembler::greaterEqual, done);
10764     __ addl(Rp, Ry);
10765     __ bind(done);
10766   %}
10767   ins_pipe(pipe_cmplt);
10768 %}
10769 
10770 /* Better to save a register than avoid a branch */
10771 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10772 %{
10773   match(Set y (AndI (CmpLTMask p q) y));
10774   effect(KILL cr);
10775 
10776   ins_cost(300);
10777 
10778   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10779             "jlt     done\n\t"
10780             "xorl    $y, $y\n"
10781             "done:   " %}
10782   ins_encode %{
10783     Register Rp = $p$$Register;
10784     Register Rq = $q$$Register;
10785     Register Ry = $y$$Register;
10786     Label done;
10787     __ cmpl(Rp, Rq);
10788     __ jccb(Assembler::less, done);
10789     __ xorl(Ry, Ry);
10790     __ bind(done);
10791   %}
10792   ins_pipe(pipe_cmplt);
10793 %}
10794 
10795 
10796 //---------- FP Instructions------------------------------------------------
10797 
10798 // Really expensive, avoid
10799 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10800 %{
10801   match(Set cr (CmpF src1 src2));
10802 
10803   ins_cost(500);
10804   format %{ "ucomiss $src1, $src2\n\t"
10805             "jnp,s   exit\n\t"
10806             "pushfq\t# saw NaN, set CF\n\t"
10807             "andq    [rsp], #0xffffff2b\n\t"
10808             "popfq\n"
10809     "exit:" %}
10810   ins_encode %{
10811     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10812     emit_cmpfp_fixup(_masm);
10813   %}
10814   ins_pipe(pipe_slow);
10815 %}
10816 
10817 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10818   match(Set cr (CmpF src1 src2));
10819 
10820   ins_cost(100);
10821   format %{ "ucomiss $src1, $src2" %}
10822   ins_encode %{
10823     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10824   %}
10825   ins_pipe(pipe_slow);
10826 %}
10827 
10828 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10829   match(Set cr (CmpF src1 (LoadF src2)));
10830 
10831   ins_cost(100);
10832   format %{ "ucomiss $src1, $src2" %}
10833   ins_encode %{
10834     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10835   %}
10836   ins_pipe(pipe_slow);
10837 %}
10838 
10839 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10840   match(Set cr (CmpF src con));
10841   ins_cost(100);
10842   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10843   ins_encode %{
10844     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10845   %}
10846   ins_pipe(pipe_slow);
10847 %}
10848 
10849 // Really expensive, avoid
10850 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10851 %{
10852   match(Set cr (CmpD src1 src2));
10853 
10854   ins_cost(500);
10855   format %{ "ucomisd $src1, $src2\n\t"
10856             "jnp,s   exit\n\t"
10857             "pushfq\t# saw NaN, set CF\n\t"
10858             "andq    [rsp], #0xffffff2b\n\t"
10859             "popfq\n"
10860     "exit:" %}
10861   ins_encode %{
10862     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10863     emit_cmpfp_fixup(_masm);
10864   %}
10865   ins_pipe(pipe_slow);
10866 %}
10867 
10868 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10869   match(Set cr (CmpD src1 src2));
10870 
10871   ins_cost(100);
10872   format %{ "ucomisd $src1, $src2 test" %}
10873   ins_encode %{
10874     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10875   %}
10876   ins_pipe(pipe_slow);
10877 %}
10878 
10879 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10880   match(Set cr (CmpD src1 (LoadD src2)));
10881 
10882   ins_cost(100);
10883   format %{ "ucomisd $src1, $src2" %}
10884   ins_encode %{
10885     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10886   %}
10887   ins_pipe(pipe_slow);
10888 %}
10889 
10890 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10891   match(Set cr (CmpD src con));
10892   ins_cost(100);
10893   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10894   ins_encode %{
10895     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10896   %}
10897   ins_pipe(pipe_slow);
10898 %}
10899 
10900 // Compare into -1,0,1
10901 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10902 %{
10903   match(Set dst (CmpF3 src1 src2));
10904   effect(KILL cr);
10905 
10906   ins_cost(275);
10907   format %{ "ucomiss $src1, $src2\n\t"
10908             "movl    $dst, #-1\n\t"
10909             "jp,s    done\n\t"
10910             "jb,s    done\n\t"
10911             "setne   $dst\n\t"
10912             "movzbl  $dst, $dst\n"
10913     "done:" %}
10914   ins_encode %{
10915     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10916     emit_cmpfp3(_masm, $dst$$Register);
10917   %}
10918   ins_pipe(pipe_slow);
10919 %}
10920 
10921 // Compare into -1,0,1
10922 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10923 %{
10924   match(Set dst (CmpF3 src1 (LoadF src2)));
10925   effect(KILL cr);
10926 
10927   ins_cost(275);
10928   format %{ "ucomiss $src1, $src2\n\t"
10929             "movl    $dst, #-1\n\t"
10930             "jp,s    done\n\t"
10931             "jb,s    done\n\t"
10932             "setne   $dst\n\t"
10933             "movzbl  $dst, $dst\n"
10934     "done:" %}
10935   ins_encode %{
10936     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10937     emit_cmpfp3(_masm, $dst$$Register);
10938   %}
10939   ins_pipe(pipe_slow);
10940 %}
10941 
10942 // Compare into -1,0,1
10943 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10944   match(Set dst (CmpF3 src con));
10945   effect(KILL cr);
10946 
10947   ins_cost(275);
10948   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10949             "movl    $dst, #-1\n\t"
10950             "jp,s    done\n\t"
10951             "jb,s    done\n\t"
10952             "setne   $dst\n\t"
10953             "movzbl  $dst, $dst\n"
10954     "done:" %}
10955   ins_encode %{
10956     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10957     emit_cmpfp3(_masm, $dst$$Register);
10958   %}
10959   ins_pipe(pipe_slow);
10960 %}
10961 
10962 // Compare into -1,0,1
10963 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10964 %{
10965   match(Set dst (CmpD3 src1 src2));
10966   effect(KILL cr);
10967 
10968   ins_cost(275);
10969   format %{ "ucomisd $src1, $src2\n\t"
10970             "movl    $dst, #-1\n\t"
10971             "jp,s    done\n\t"
10972             "jb,s    done\n\t"
10973             "setne   $dst\n\t"
10974             "movzbl  $dst, $dst\n"
10975     "done:" %}
10976   ins_encode %{
10977     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10978     emit_cmpfp3(_masm, $dst$$Register);
10979   %}
10980   ins_pipe(pipe_slow);
10981 %}
10982 
10983 // Compare into -1,0,1
10984 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10985 %{
10986   match(Set dst (CmpD3 src1 (LoadD src2)));
10987   effect(KILL cr);
10988 
10989   ins_cost(275);
10990   format %{ "ucomisd $src1, $src2\n\t"
10991             "movl    $dst, #-1\n\t"
10992             "jp,s    done\n\t"
10993             "jb,s    done\n\t"
10994             "setne   $dst\n\t"
10995             "movzbl  $dst, $dst\n"
10996     "done:" %}
10997   ins_encode %{
10998     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10999     emit_cmpfp3(_masm, $dst$$Register);
11000   %}
11001   ins_pipe(pipe_slow);
11002 %}
11003 
11004 // Compare into -1,0,1
11005 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
11006   match(Set dst (CmpD3 src con));
11007   effect(KILL cr);
11008 
11009   ins_cost(275);
11010   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
11011             "movl    $dst, #-1\n\t"
11012             "jp,s    done\n\t"
11013             "jb,s    done\n\t"
11014             "setne   $dst\n\t"
11015             "movzbl  $dst, $dst\n"
11016     "done:" %}
11017   ins_encode %{
11018     __ ucomisd($src$$XMMRegister, $constantaddress($con));
11019     emit_cmpfp3(_masm, $dst$$Register);
11020   %}
11021   ins_pipe(pipe_slow);
11022 %}
11023 
11024 //----------Arithmetic Conversion Instructions---------------------------------
11025 
11026 instruct convF2D_reg_reg(regD dst, regF src)
11027 %{
11028   match(Set dst (ConvF2D src));
11029 
11030   format %{ "cvtss2sd $dst, $src" %}
11031   ins_encode %{
11032     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11033   %}
11034   ins_pipe(pipe_slow); // XXX
11035 %}
11036 
11037 instruct convF2D_reg_mem(regD dst, memory src)
11038 %{
11039   match(Set dst (ConvF2D (LoadF src)));
11040 
11041   format %{ "cvtss2sd $dst, $src" %}
11042   ins_encode %{
11043     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
11044   %}
11045   ins_pipe(pipe_slow); // XXX
11046 %}
11047 
11048 instruct convD2F_reg_reg(regF dst, regD src)
11049 %{
11050   match(Set dst (ConvD2F src));
11051 
11052   format %{ "cvtsd2ss $dst, $src" %}
11053   ins_encode %{
11054     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11055   %}
11056   ins_pipe(pipe_slow); // XXX
11057 %}
11058 
11059 instruct convD2F_reg_mem(regF dst, memory src)
11060 %{
11061   match(Set dst (ConvD2F (LoadD src)));
11062 
11063   format %{ "cvtsd2ss $dst, $src" %}
11064   ins_encode %{
11065     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
11066   %}
11067   ins_pipe(pipe_slow); // XXX
11068 %}
11069 
11070 // XXX do mem variants
11071 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11072 %{
11073   match(Set dst (ConvF2I src));
11074   effect(KILL cr);
11075   format %{ "convert_f2i $dst, $src" %}
11076   ins_encode %{
11077     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11078   %}
11079   ins_pipe(pipe_slow);
11080 %}
11081 
11082 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11083 %{
11084   match(Set dst (ConvF2L src));
11085   effect(KILL cr);
11086   format %{ "convert_f2l $dst, $src"%}
11087   ins_encode %{
11088     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11089   %}
11090   ins_pipe(pipe_slow);
11091 %}
11092 
11093 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11094 %{
11095   match(Set dst (ConvD2I src));
11096   effect(KILL cr);
11097   format %{ "convert_d2i $dst, $src"%}
11098   ins_encode %{
11099     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11100   %}
11101   ins_pipe(pipe_slow);
11102 %}
11103 
11104 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11105 %{
11106   match(Set dst (ConvD2L src));
11107   effect(KILL cr);
11108   format %{ "convert_d2l $dst, $src"%}
11109   ins_encode %{
11110     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11111   %}
11112   ins_pipe(pipe_slow);
11113 %}
11114 
11115 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11116 %{
11117   match(Set dst (RoundD src));
11118   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11119   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
11120   ins_encode %{
11121     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11122   %}
11123   ins_pipe(pipe_slow);
11124 %}
11125 
11126 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11127 %{
11128   match(Set dst (RoundF src));
11129   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11130   format %{ "round_float $dst,$src" %}
11131   ins_encode %{
11132     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11133   %}
11134   ins_pipe(pipe_slow);
11135 %}
11136 
11137 instruct convI2F_reg_reg(regF dst, rRegI src)
11138 %{
11139   predicate(!UseXmmI2F);
11140   match(Set dst (ConvI2F src));
11141 
11142   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11143   ins_encode %{
11144     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11145   %}
11146   ins_pipe(pipe_slow); // XXX
11147 %}
11148 
11149 instruct convI2F_reg_mem(regF dst, memory src)
11150 %{
11151   match(Set dst (ConvI2F (LoadI src)));
11152 
11153   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11154   ins_encode %{
11155     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
11156   %}
11157   ins_pipe(pipe_slow); // XXX
11158 %}
11159 
11160 instruct convI2D_reg_reg(regD dst, rRegI src)
11161 %{
11162   predicate(!UseXmmI2D);
11163   match(Set dst (ConvI2D src));
11164 
11165   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11166   ins_encode %{
11167     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11168   %}
11169   ins_pipe(pipe_slow); // XXX
11170 %}
11171 
11172 instruct convI2D_reg_mem(regD dst, memory src)
11173 %{
11174   match(Set dst (ConvI2D (LoadI src)));
11175 
11176   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11177   ins_encode %{
11178     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
11179   %}
11180   ins_pipe(pipe_slow); // XXX
11181 %}
11182 
11183 instruct convXI2F_reg(regF dst, rRegI src)
11184 %{
11185   predicate(UseXmmI2F);
11186   match(Set dst (ConvI2F src));
11187 
11188   format %{ "movdl $dst, $src\n\t"
11189             "cvtdq2psl $dst, $dst\t# i2f" %}
11190   ins_encode %{
11191     __ movdl($dst$$XMMRegister, $src$$Register);
11192     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11193   %}
11194   ins_pipe(pipe_slow); // XXX
11195 %}
11196 
11197 instruct convXI2D_reg(regD dst, rRegI src)
11198 %{
11199   predicate(UseXmmI2D);
11200   match(Set dst (ConvI2D src));
11201 
11202   format %{ "movdl $dst, $src\n\t"
11203             "cvtdq2pdl $dst, $dst\t# i2d" %}
11204   ins_encode %{
11205     __ movdl($dst$$XMMRegister, $src$$Register);
11206     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11207   %}
11208   ins_pipe(pipe_slow); // XXX
11209 %}
11210 
11211 instruct convL2F_reg_reg(regF dst, rRegL src)
11212 %{
11213   match(Set dst (ConvL2F src));
11214 
11215   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11216   ins_encode %{
11217     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11218   %}
11219   ins_pipe(pipe_slow); // XXX
11220 %}
11221 
11222 instruct convL2F_reg_mem(regF dst, memory src)
11223 %{
11224   match(Set dst (ConvL2F (LoadL src)));
11225 
11226   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11227   ins_encode %{
11228     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11229   %}
11230   ins_pipe(pipe_slow); // XXX
11231 %}
11232 
11233 instruct convL2D_reg_reg(regD dst, rRegL src)
11234 %{
11235   match(Set dst (ConvL2D src));
11236 
11237   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11238   ins_encode %{
11239     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11240   %}
11241   ins_pipe(pipe_slow); // XXX
11242 %}
11243 
11244 instruct convL2D_reg_mem(regD dst, memory src)
11245 %{
11246   match(Set dst (ConvL2D (LoadL src)));
11247 
11248   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11249   ins_encode %{
11250     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11251   %}
11252   ins_pipe(pipe_slow); // XXX
11253 %}
11254 
11255 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11256 %{
11257   match(Set dst (ConvI2L src));
11258 
11259   ins_cost(125);
11260   format %{ "movslq  $dst, $src\t# i2l" %}
11261   ins_encode %{
11262     __ movslq($dst$$Register, $src$$Register);
11263   %}
11264   ins_pipe(ialu_reg_reg);
11265 %}
11266 
11267 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11268 // %{
11269 //   match(Set dst (ConvI2L src));
11270 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11271 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11272 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11273 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11274 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11275 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11276 
11277 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11278 //   ins_encode(enc_copy(dst, src));
11279 // //   opcode(0x63); // needs REX.W
11280 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11281 //   ins_pipe(ialu_reg_reg);
11282 // %}
11283 
11284 // Zero-extend convert int to long
11285 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11286 %{
11287   match(Set dst (AndL (ConvI2L src) mask));
11288 
11289   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11290   ins_encode %{
11291     if ($dst$$reg != $src$$reg) {
11292       __ movl($dst$$Register, $src$$Register);
11293     }
11294   %}
11295   ins_pipe(ialu_reg_reg);
11296 %}
11297 
11298 // Zero-extend convert int to long
11299 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11300 %{
11301   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11302 
11303   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11304   ins_encode %{
11305     __ movl($dst$$Register, $src$$Address);
11306   %}
11307   ins_pipe(ialu_reg_mem);
11308 %}
11309 
11310 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11311 %{
11312   match(Set dst (AndL src mask));
11313 
11314   format %{ "movl    $dst, $src\t# zero-extend long" %}
11315   ins_encode %{
11316     __ movl($dst$$Register, $src$$Register);
11317   %}
11318   ins_pipe(ialu_reg_reg);
11319 %}
11320 
11321 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11322 %{
11323   match(Set dst (ConvL2I src));
11324 
11325   format %{ "movl    $dst, $src\t# l2i" %}
11326   ins_encode %{
11327     __ movl($dst$$Register, $src$$Register);
11328   %}
11329   ins_pipe(ialu_reg_reg);
11330 %}
11331 
11332 
11333 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11334   match(Set dst (MoveF2I src));
11335   effect(DEF dst, USE src);
11336 
11337   ins_cost(125);
11338   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11339   ins_encode %{
11340     __ movl($dst$$Register, Address(rsp, $src$$disp));
11341   %}
11342   ins_pipe(ialu_reg_mem);
11343 %}
11344 
11345 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11346   match(Set dst (MoveI2F src));
11347   effect(DEF dst, USE src);
11348 
11349   ins_cost(125);
11350   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11351   ins_encode %{
11352     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11353   %}
11354   ins_pipe(pipe_slow);
11355 %}
11356 
11357 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11358   match(Set dst (MoveD2L src));
11359   effect(DEF dst, USE src);
11360 
11361   ins_cost(125);
11362   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11363   ins_encode %{
11364     __ movq($dst$$Register, Address(rsp, $src$$disp));
11365   %}
11366   ins_pipe(ialu_reg_mem);
11367 %}
11368 
11369 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11370   predicate(!UseXmmLoadAndClearUpper);
11371   match(Set dst (MoveL2D src));
11372   effect(DEF dst, USE src);
11373 
11374   ins_cost(125);
11375   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11376   ins_encode %{
11377     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11378   %}
11379   ins_pipe(pipe_slow);
11380 %}
11381 
11382 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11383   predicate(UseXmmLoadAndClearUpper);
11384   match(Set dst (MoveL2D src));
11385   effect(DEF dst, USE src);
11386 
11387   ins_cost(125);
11388   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11389   ins_encode %{
11390     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11391   %}
11392   ins_pipe(pipe_slow);
11393 %}
11394 
11395 
11396 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11397   match(Set dst (MoveF2I src));
11398   effect(DEF dst, USE src);
11399 
11400   ins_cost(95); // XXX
11401   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11402   ins_encode %{
11403     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11404   %}
11405   ins_pipe(pipe_slow);
11406 %}
11407 
11408 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11409   match(Set dst (MoveI2F src));
11410   effect(DEF dst, USE src);
11411 
11412   ins_cost(100);
11413   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11414   ins_encode %{
11415     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11416   %}
11417   ins_pipe( ialu_mem_reg );
11418 %}
11419 
11420 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11421   match(Set dst (MoveD2L src));
11422   effect(DEF dst, USE src);
11423 
11424   ins_cost(95); // XXX
11425   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11426   ins_encode %{
11427     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11428   %}
11429   ins_pipe(pipe_slow);
11430 %}
11431 
11432 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11433   match(Set dst (MoveL2D src));
11434   effect(DEF dst, USE src);
11435 
11436   ins_cost(100);
11437   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11438   ins_encode %{
11439     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11440   %}
11441   ins_pipe(ialu_mem_reg);
11442 %}
11443 
11444 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11445   match(Set dst (MoveF2I src));
11446   effect(DEF dst, USE src);
11447   ins_cost(85);
11448   format %{ "movd    $dst,$src\t# MoveF2I" %}
11449   ins_encode %{
11450     __ movdl($dst$$Register, $src$$XMMRegister);
11451   %}
11452   ins_pipe( pipe_slow );
11453 %}
11454 
11455 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11456   match(Set dst (MoveD2L src));
11457   effect(DEF dst, USE src);
11458   ins_cost(85);
11459   format %{ "movd    $dst,$src\t# MoveD2L" %}
11460   ins_encode %{
11461     __ movdq($dst$$Register, $src$$XMMRegister);
11462   %}
11463   ins_pipe( pipe_slow );
11464 %}
11465 
11466 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11467   match(Set dst (MoveI2F src));
11468   effect(DEF dst, USE src);
11469   ins_cost(100);
11470   format %{ "movd    $dst,$src\t# MoveI2F" %}
11471   ins_encode %{
11472     __ movdl($dst$$XMMRegister, $src$$Register);
11473   %}
11474   ins_pipe( pipe_slow );
11475 %}
11476 
11477 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11478   match(Set dst (MoveL2D src));
11479   effect(DEF dst, USE src);
11480   ins_cost(100);
11481   format %{ "movd    $dst,$src\t# MoveL2D" %}
11482   ins_encode %{
11483      __ movdq($dst$$XMMRegister, $src$$Register);
11484   %}
11485   ins_pipe( pipe_slow );
11486 %}
11487 
11488 
11489 // Fast clearing of an array
11490 // Small ClearArray non-AVX512.
11491 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11492                   Universe dummy, rFlagsReg cr)
11493 %{
11494   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11495   match(Set dummy (ClearArray (Binary cnt base) val));
11496   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11497 
11498   format %{ $$template
11499     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11500     $$emit$$"jg      LARGE\n\t"
11501     $$emit$$"dec     rcx\n\t"
11502     $$emit$$"js      DONE\t# Zero length\n\t"
11503     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11504     $$emit$$"dec     rcx\n\t"
11505     $$emit$$"jge     LOOP\n\t"
11506     $$emit$$"jmp     DONE\n\t"
11507     $$emit$$"# LARGE:\n\t"
11508     if (UseFastStosb) {
11509        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11510        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11511     } else if (UseXMMForObjInit) {
11512        $$emit$$"movdq   $tmp, $val\n\t"
11513        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11514        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11515        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11516        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11517        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11518        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11519        $$emit$$"add     0x40,rax\n\t"
11520        $$emit$$"# L_zero_64_bytes:\n\t"
11521        $$emit$$"sub     0x8,rcx\n\t"
11522        $$emit$$"jge     L_loop\n\t"
11523        $$emit$$"add     0x4,rcx\n\t"
11524        $$emit$$"jl      L_tail\n\t"
11525        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11526        $$emit$$"add     0x20,rax\n\t"
11527        $$emit$$"sub     0x4,rcx\n\t"
11528        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11529        $$emit$$"add     0x4,rcx\n\t"
11530        $$emit$$"jle     L_end\n\t"
11531        $$emit$$"dec     rcx\n\t"
11532        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11533        $$emit$$"vmovq   xmm0,(rax)\n\t"
11534        $$emit$$"add     0x8,rax\n\t"
11535        $$emit$$"dec     rcx\n\t"
11536        $$emit$$"jge     L_sloop\n\t"
11537        $$emit$$"# L_end:\n\t"
11538     } else {
11539        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11540     }
11541     $$emit$$"# DONE"
11542   %}
11543   ins_encode %{
11544     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11545                  $tmp$$XMMRegister, false, false);
11546   %}
11547   ins_pipe(pipe_slow);
11548 %}
11549 
11550 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11551                             Universe dummy, rFlagsReg cr)
11552 %{
11553   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11554   match(Set dummy (ClearArray (Binary cnt base) val));
11555   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11556 
11557   format %{ $$template
11558     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11559     $$emit$$"jg      LARGE\n\t"
11560     $$emit$$"dec     rcx\n\t"
11561     $$emit$$"js      DONE\t# Zero length\n\t"
11562     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11563     $$emit$$"dec     rcx\n\t"
11564     $$emit$$"jge     LOOP\n\t"
11565     $$emit$$"jmp     DONE\n\t"
11566     $$emit$$"# LARGE:\n\t"
11567     if (UseXMMForObjInit) {
11568        $$emit$$"movdq   $tmp, $val\n\t"
11569        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11570        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11571        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11572        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11573        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11574        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11575        $$emit$$"add     0x40,rax\n\t"
11576        $$emit$$"# L_zero_64_bytes:\n\t"
11577        $$emit$$"sub     0x8,rcx\n\t"
11578        $$emit$$"jge     L_loop\n\t"
11579        $$emit$$"add     0x4,rcx\n\t"
11580        $$emit$$"jl      L_tail\n\t"
11581        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11582        $$emit$$"add     0x20,rax\n\t"
11583        $$emit$$"sub     0x4,rcx\n\t"
11584        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11585        $$emit$$"add     0x4,rcx\n\t"
11586        $$emit$$"jle     L_end\n\t"
11587        $$emit$$"dec     rcx\n\t"
11588        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11589        $$emit$$"vmovq   xmm0,(rax)\n\t"
11590        $$emit$$"add     0x8,rax\n\t"
11591        $$emit$$"dec     rcx\n\t"
11592        $$emit$$"jge     L_sloop\n\t"
11593        $$emit$$"# L_end:\n\t"
11594     } else {
11595        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11596     }
11597     $$emit$$"# DONE"
11598   %}
11599   ins_encode %{
11600     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11601                  $tmp$$XMMRegister, false, true);
11602   %}
11603   ins_pipe(pipe_slow);
11604 %}
11605 
11606 // Small ClearArray AVX512 non-constant length.
11607 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11608                        Universe dummy, rFlagsReg cr)
11609 %{
11610   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11611   match(Set dummy (ClearArray (Binary cnt base) val));
11612   ins_cost(125);
11613   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11614 
11615   format %{ $$template
11616     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11617     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11618     $$emit$$"jg      LARGE\n\t"
11619     $$emit$$"dec     rcx\n\t"
11620     $$emit$$"js      DONE\t# Zero length\n\t"
11621     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11622     $$emit$$"dec     rcx\n\t"
11623     $$emit$$"jge     LOOP\n\t"
11624     $$emit$$"jmp     DONE\n\t"
11625     $$emit$$"# LARGE:\n\t"
11626     if (UseFastStosb) {
11627        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11628        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11629     } else if (UseXMMForObjInit) {
11630        $$emit$$"mov     rdi,rax\n\t"
11631        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11632        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11633        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11634        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11635        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11636        $$emit$$"add     0x40,rax\n\t"
11637        $$emit$$"# L_zero_64_bytes:\n\t"
11638        $$emit$$"sub     0x8,rcx\n\t"
11639        $$emit$$"jge     L_loop\n\t"
11640        $$emit$$"add     0x4,rcx\n\t"
11641        $$emit$$"jl      L_tail\n\t"
11642        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11643        $$emit$$"add     0x20,rax\n\t"
11644        $$emit$$"sub     0x4,rcx\n\t"
11645        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11646        $$emit$$"add     0x4,rcx\n\t"
11647        $$emit$$"jle     L_end\n\t"
11648        $$emit$$"dec     rcx\n\t"
11649        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11650        $$emit$$"vmovq   xmm0,(rax)\n\t"
11651        $$emit$$"add     0x8,rax\n\t"
11652        $$emit$$"dec     rcx\n\t"
11653        $$emit$$"jge     L_sloop\n\t"
11654        $$emit$$"# L_end:\n\t"
11655     } else {
11656        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11657     }
11658     $$emit$$"# DONE"
11659   %}
11660   ins_encode %{
11661     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11662                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11663   %}
11664   ins_pipe(pipe_slow);
11665 %}
11666 
11667 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11668                                  Universe dummy, rFlagsReg cr)
11669 %{
11670   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11671   match(Set dummy (ClearArray (Binary cnt base) val));
11672   ins_cost(125);
11673   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11674 
11675   format %{ $$template
11676     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11677     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11678     $$emit$$"jg      LARGE\n\t"
11679     $$emit$$"dec     rcx\n\t"
11680     $$emit$$"js      DONE\t# Zero length\n\t"
11681     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11682     $$emit$$"dec     rcx\n\t"
11683     $$emit$$"jge     LOOP\n\t"
11684     $$emit$$"jmp     DONE\n\t"
11685     $$emit$$"# LARGE:\n\t"
11686     if (UseFastStosb) {
11687        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11688        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11689     } else if (UseXMMForObjInit) {
11690        $$emit$$"mov     rdi,rax\n\t"
11691        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11692        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11693        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11694        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11695        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11696        $$emit$$"add     0x40,rax\n\t"
11697        $$emit$$"# L_zero_64_bytes:\n\t"
11698        $$emit$$"sub     0x8,rcx\n\t"
11699        $$emit$$"jge     L_loop\n\t"
11700        $$emit$$"add     0x4,rcx\n\t"
11701        $$emit$$"jl      L_tail\n\t"
11702        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11703        $$emit$$"add     0x20,rax\n\t"
11704        $$emit$$"sub     0x4,rcx\n\t"
11705        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11706        $$emit$$"add     0x4,rcx\n\t"
11707        $$emit$$"jle     L_end\n\t"
11708        $$emit$$"dec     rcx\n\t"
11709        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11710        $$emit$$"vmovq   xmm0,(rax)\n\t"
11711        $$emit$$"add     0x8,rax\n\t"
11712        $$emit$$"dec     rcx\n\t"
11713        $$emit$$"jge     L_sloop\n\t"
11714        $$emit$$"# L_end:\n\t"
11715     } else {
11716        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11717     }
11718     $$emit$$"# DONE"
11719   %}
11720   ins_encode %{
11721     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11722                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11723   %}
11724   ins_pipe(pipe_slow);
11725 %}
11726 
11727 // Large ClearArray non-AVX512.
11728 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11729                         Universe dummy, rFlagsReg cr)
11730 %{
11731   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11732   match(Set dummy (ClearArray (Binary cnt base) val));
11733   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11734 
11735   format %{ $$template
11736     if (UseFastStosb) {
11737        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11738        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11739     } else if (UseXMMForObjInit) {
11740        $$emit$$"movdq   $tmp, $val\n\t"
11741        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11742        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11743        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11744        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11745        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11746        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11747        $$emit$$"add     0x40,rax\n\t"
11748        $$emit$$"# L_zero_64_bytes:\n\t"
11749        $$emit$$"sub     0x8,rcx\n\t"
11750        $$emit$$"jge     L_loop\n\t"
11751        $$emit$$"add     0x4,rcx\n\t"
11752        $$emit$$"jl      L_tail\n\t"
11753        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11754        $$emit$$"add     0x20,rax\n\t"
11755        $$emit$$"sub     0x4,rcx\n\t"
11756        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11757        $$emit$$"add     0x4,rcx\n\t"
11758        $$emit$$"jle     L_end\n\t"
11759        $$emit$$"dec     rcx\n\t"
11760        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11761        $$emit$$"vmovq   xmm0,(rax)\n\t"
11762        $$emit$$"add     0x8,rax\n\t"
11763        $$emit$$"dec     rcx\n\t"
11764        $$emit$$"jge     L_sloop\n\t"
11765        $$emit$$"# L_end:\n\t"
11766     } else {
11767        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11768     }
11769   %}
11770   ins_encode %{
11771     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11772                  $tmp$$XMMRegister, true, false);
11773   %}
11774   ins_pipe(pipe_slow);
11775 %}
11776 
11777 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11778                                   Universe dummy, rFlagsReg cr)
11779 %{
11780   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11781   match(Set dummy (ClearArray (Binary cnt base) val));
11782   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11783 
11784   format %{ $$template
11785     if (UseXMMForObjInit) {
11786        $$emit$$"movdq   $tmp, $val\n\t"
11787        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11788        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11789        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11790        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11791        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11792        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11793        $$emit$$"add     0x40,rax\n\t"
11794        $$emit$$"# L_zero_64_bytes:\n\t"
11795        $$emit$$"sub     0x8,rcx\n\t"
11796        $$emit$$"jge     L_loop\n\t"
11797        $$emit$$"add     0x4,rcx\n\t"
11798        $$emit$$"jl      L_tail\n\t"
11799        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11800        $$emit$$"add     0x20,rax\n\t"
11801        $$emit$$"sub     0x4,rcx\n\t"
11802        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11803        $$emit$$"add     0x4,rcx\n\t"
11804        $$emit$$"jle     L_end\n\t"
11805        $$emit$$"dec     rcx\n\t"
11806        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11807        $$emit$$"vmovq   xmm0,(rax)\n\t"
11808        $$emit$$"add     0x8,rax\n\t"
11809        $$emit$$"dec     rcx\n\t"
11810        $$emit$$"jge     L_sloop\n\t"
11811        $$emit$$"# L_end:\n\t"
11812     } else {
11813        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11814     }
11815   %}
11816   ins_encode %{
11817     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11818                  $tmp$$XMMRegister, true, true);
11819   %}
11820   ins_pipe(pipe_slow);
11821 %}
11822 
11823 // Large ClearArray AVX512.
11824 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11825                              Universe dummy, rFlagsReg cr)
11826 %{
11827   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11828   match(Set dummy (ClearArray (Binary cnt base) val));
11829   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11830 
11831   format %{ $$template
11832     if (UseFastStosb) {
11833        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11834        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11835        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11836     } else if (UseXMMForObjInit) {
11837        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11838        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11839        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11840        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11841        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11842        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11843        $$emit$$"add     0x40,rax\n\t"
11844        $$emit$$"# L_zero_64_bytes:\n\t"
11845        $$emit$$"sub     0x8,rcx\n\t"
11846        $$emit$$"jge     L_loop\n\t"
11847        $$emit$$"add     0x4,rcx\n\t"
11848        $$emit$$"jl      L_tail\n\t"
11849        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11850        $$emit$$"add     0x20,rax\n\t"
11851        $$emit$$"sub     0x4,rcx\n\t"
11852        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11853        $$emit$$"add     0x4,rcx\n\t"
11854        $$emit$$"jle     L_end\n\t"
11855        $$emit$$"dec     rcx\n\t"
11856        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11857        $$emit$$"vmovq   xmm0,(rax)\n\t"
11858        $$emit$$"add     0x8,rax\n\t"
11859        $$emit$$"dec     rcx\n\t"
11860        $$emit$$"jge     L_sloop\n\t"
11861        $$emit$$"# L_end:\n\t"
11862     } else {
11863        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11864        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11865     }
11866   %}
11867   ins_encode %{
11868     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11869                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11870   %}
11871   ins_pipe(pipe_slow);
11872 %}
11873 
11874 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11875                                        Universe dummy, rFlagsReg cr)
11876 %{
11877   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11878   match(Set dummy (ClearArray (Binary cnt base) val));
11879   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11880 
11881   format %{ $$template
11882     if (UseFastStosb) {
11883        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11884        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11885        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11886     } else if (UseXMMForObjInit) {
11887        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11888        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11889        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11890        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11891        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11892        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11893        $$emit$$"add     0x40,rax\n\t"
11894        $$emit$$"# L_zero_64_bytes:\n\t"
11895        $$emit$$"sub     0x8,rcx\n\t"
11896        $$emit$$"jge     L_loop\n\t"
11897        $$emit$$"add     0x4,rcx\n\t"
11898        $$emit$$"jl      L_tail\n\t"
11899        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11900        $$emit$$"add     0x20,rax\n\t"
11901        $$emit$$"sub     0x4,rcx\n\t"
11902        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11903        $$emit$$"add     0x4,rcx\n\t"
11904        $$emit$$"jle     L_end\n\t"
11905        $$emit$$"dec     rcx\n\t"
11906        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11907        $$emit$$"vmovq   xmm0,(rax)\n\t"
11908        $$emit$$"add     0x8,rax\n\t"
11909        $$emit$$"dec     rcx\n\t"
11910        $$emit$$"jge     L_sloop\n\t"
11911        $$emit$$"# L_end:\n\t"
11912     } else {
11913        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11914        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11915     }
11916   %}
11917   ins_encode %{
11918     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11919                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11920   %}
11921   ins_pipe(pipe_slow);
11922 %}
11923 
11924 // Small ClearArray AVX512 constant length.
11925 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11926 %{
11927   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11928             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11929   match(Set dummy (ClearArray (Binary cnt base) val));
11930   ins_cost(100);
11931   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11932   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11933   ins_encode %{
11934     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11935   %}
11936   ins_pipe(pipe_slow);
11937 %}
11938 
11939 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11940                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11941 %{
11942   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11943   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11944   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11945 
11946   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11947   ins_encode %{
11948     __ string_compare($str1$$Register, $str2$$Register,
11949                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11950                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11951   %}
11952   ins_pipe( pipe_slow );
11953 %}
11954 
11955 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11956                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11957 %{
11958   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11959   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11960   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11961 
11962   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11963   ins_encode %{
11964     __ string_compare($str1$$Register, $str2$$Register,
11965                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11966                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11967   %}
11968   ins_pipe( pipe_slow );
11969 %}
11970 
11971 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11972                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11973 %{
11974   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11975   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11976   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11977 
11978   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11979   ins_encode %{
11980     __ string_compare($str1$$Register, $str2$$Register,
11981                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11982                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11983   %}
11984   ins_pipe( pipe_slow );
11985 %}
11986 
11987 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11988                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11989 %{
11990   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11991   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11992   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11993 
11994   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11995   ins_encode %{
11996     __ string_compare($str1$$Register, $str2$$Register,
11997                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11998                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11999   %}
12000   ins_pipe( pipe_slow );
12001 %}
12002 
12003 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12004                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
12005 %{
12006   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12007   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12008   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12009 
12010   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12011   ins_encode %{
12012     __ string_compare($str1$$Register, $str2$$Register,
12013                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12014                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
12015   %}
12016   ins_pipe( pipe_slow );
12017 %}
12018 
12019 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12020                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
12021 %{
12022   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12023   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12024   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12025 
12026   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12027   ins_encode %{
12028     __ string_compare($str1$$Register, $str2$$Register,
12029                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12030                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
12031   %}
12032   ins_pipe( pipe_slow );
12033 %}
12034 
12035 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
12036                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
12037 %{
12038   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12039   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12040   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12041 
12042   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12043   ins_encode %{
12044     __ string_compare($str2$$Register, $str1$$Register,
12045                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
12046                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
12047   %}
12048   ins_pipe( pipe_slow );
12049 %}
12050 
12051 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
12052                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
12053 %{
12054   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12055   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12056   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12057 
12058   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12059   ins_encode %{
12060     __ string_compare($str2$$Register, $str1$$Register,
12061                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
12062                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
12063   %}
12064   ins_pipe( pipe_slow );
12065 %}
12066 
12067 // fast search of substring with known size.
12068 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12069                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12070 %{
12071   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12072   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12073   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12074 
12075   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12076   ins_encode %{
12077     int icnt2 = (int)$int_cnt2$$constant;
12078     if (icnt2 >= 16) {
12079       // IndexOf for constant substrings with size >= 16 elements
12080       // which don't need to be loaded through stack.
12081       __ string_indexofC8($str1$$Register, $str2$$Register,
12082                           $cnt1$$Register, $cnt2$$Register,
12083                           icnt2, $result$$Register,
12084                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12085     } else {
12086       // Small strings are loaded through stack if they cross page boundary.
12087       __ string_indexof($str1$$Register, $str2$$Register,
12088                         $cnt1$$Register, $cnt2$$Register,
12089                         icnt2, $result$$Register,
12090                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12091     }
12092   %}
12093   ins_pipe( pipe_slow );
12094 %}
12095 
12096 // fast search of substring with known size.
12097 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12098                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12099 %{
12100   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12101   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12102   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12103 
12104   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12105   ins_encode %{
12106     int icnt2 = (int)$int_cnt2$$constant;
12107     if (icnt2 >= 8) {
12108       // IndexOf for constant substrings with size >= 8 elements
12109       // which don't need to be loaded through stack.
12110       __ string_indexofC8($str1$$Register, $str2$$Register,
12111                           $cnt1$$Register, $cnt2$$Register,
12112                           icnt2, $result$$Register,
12113                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12114     } else {
12115       // Small strings are loaded through stack if they cross page boundary.
12116       __ string_indexof($str1$$Register, $str2$$Register,
12117                         $cnt1$$Register, $cnt2$$Register,
12118                         icnt2, $result$$Register,
12119                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12120     }
12121   %}
12122   ins_pipe( pipe_slow );
12123 %}
12124 
12125 // fast search of substring with known size.
12126 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12127                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12128 %{
12129   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12130   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12131   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12132 
12133   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12134   ins_encode %{
12135     int icnt2 = (int)$int_cnt2$$constant;
12136     if (icnt2 >= 8) {
12137       // IndexOf for constant substrings with size >= 8 elements
12138       // which don't need to be loaded through stack.
12139       __ string_indexofC8($str1$$Register, $str2$$Register,
12140                           $cnt1$$Register, $cnt2$$Register,
12141                           icnt2, $result$$Register,
12142                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12143     } else {
12144       // Small strings are loaded through stack if they cross page boundary.
12145       __ string_indexof($str1$$Register, $str2$$Register,
12146                         $cnt1$$Register, $cnt2$$Register,
12147                         icnt2, $result$$Register,
12148                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12149     }
12150   %}
12151   ins_pipe( pipe_slow );
12152 %}
12153 
12154 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12155                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12156 %{
12157   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12158   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12159   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12160 
12161   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12162   ins_encode %{
12163     __ string_indexof($str1$$Register, $str2$$Register,
12164                       $cnt1$$Register, $cnt2$$Register,
12165                       (-1), $result$$Register,
12166                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12167   %}
12168   ins_pipe( pipe_slow );
12169 %}
12170 
12171 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12172                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12173 %{
12174   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12175   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12176   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12177 
12178   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12179   ins_encode %{
12180     __ string_indexof($str1$$Register, $str2$$Register,
12181                       $cnt1$$Register, $cnt2$$Register,
12182                       (-1), $result$$Register,
12183                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12184   %}
12185   ins_pipe( pipe_slow );
12186 %}
12187 
12188 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12189                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12190 %{
12191   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12192   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12193   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12194 
12195   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12196   ins_encode %{
12197     __ string_indexof($str1$$Register, $str2$$Register,
12198                       $cnt1$$Register, $cnt2$$Register,
12199                       (-1), $result$$Register,
12200                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12201   %}
12202   ins_pipe( pipe_slow );
12203 %}
12204 
12205 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
12206                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
12207 %{
12208   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12209   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12210   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12211   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12212   ins_encode %{
12213     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12214                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12215   %}
12216   ins_pipe( pipe_slow );
12217 %}
12218 
12219 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
12220                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
12221 %{
12222   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12223   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12224   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12225   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12226   ins_encode %{
12227     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12228                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12229   %}
12230   ins_pipe( pipe_slow );
12231 %}
12232 
12233 // fast string equals
12234 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12235                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
12236 %{
12237   predicate(!VM_Version::supports_avx512vlbw());
12238   match(Set result (StrEquals (Binary str1 str2) cnt));
12239   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12240 
12241   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12242   ins_encode %{
12243     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12244                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12245                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12246   %}
12247   ins_pipe( pipe_slow );
12248 %}
12249 
12250 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12251                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
12252 %{
12253   predicate(VM_Version::supports_avx512vlbw());
12254   match(Set result (StrEquals (Binary str1 str2) cnt));
12255   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12256 
12257   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12258   ins_encode %{
12259     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12260                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12261                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12262   %}
12263   ins_pipe( pipe_slow );
12264 %}
12265 
12266 // fast array equals
12267 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12268                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12269 %{
12270   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12271   match(Set result (AryEq ary1 ary2));
12272   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12273 
12274   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12275   ins_encode %{
12276     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12277                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12278                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12279   %}
12280   ins_pipe( pipe_slow );
12281 %}
12282 
12283 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12284                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12285 %{
12286   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12287   match(Set result (AryEq ary1 ary2));
12288   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12289 
12290   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12291   ins_encode %{
12292     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12293                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12294                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12295   %}
12296   ins_pipe( pipe_slow );
12297 %}
12298 
12299 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12300                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12301 %{
12302   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12303   match(Set result (AryEq ary1 ary2));
12304   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12305 
12306   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12307   ins_encode %{
12308     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12309                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12310                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12311   %}
12312   ins_pipe( pipe_slow );
12313 %}
12314 
12315 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12316                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12317 %{
12318   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12319   match(Set result (AryEq ary1 ary2));
12320   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12321 
12322   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12323   ins_encode %{
12324     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12325                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12326                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12327   %}
12328   ins_pipe( pipe_slow );
12329 %}
12330 
12331 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
12332                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
12333                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
12334                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
12335                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
12336 %{
12337   predicate(UseAVX >= 2);
12338   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
12339   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
12340          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
12341          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
12342          USE basic_type, KILL cr);
12343 
12344   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
12345   ins_encode %{
12346     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
12347                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12348                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
12349                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
12350                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
12351                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
12352                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
12353   %}
12354   ins_pipe( pipe_slow );
12355 %}
12356 
12357 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12358                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
12359 %{
12360   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12361   match(Set result (CountPositives ary1 len));
12362   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12363 
12364   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12365   ins_encode %{
12366     __ count_positives($ary1$$Register, $len$$Register,
12367                        $result$$Register, $tmp3$$Register,
12368                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12369   %}
12370   ins_pipe( pipe_slow );
12371 %}
12372 
12373 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12374                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
12375 %{
12376   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12377   match(Set result (CountPositives ary1 len));
12378   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12379 
12380   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12381   ins_encode %{
12382     __ count_positives($ary1$$Register, $len$$Register,
12383                        $result$$Register, $tmp3$$Register,
12384                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12385   %}
12386   ins_pipe( pipe_slow );
12387 %}
12388 
12389 // fast char[] to byte[] compression
12390 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12391                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12392   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12393   match(Set result (StrCompressedCopy src (Binary dst len)));
12394   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
12395          USE_KILL len, KILL tmp5, KILL cr);
12396 
12397   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12398   ins_encode %{
12399     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12400                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12401                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12402                            knoreg, knoreg);
12403   %}
12404   ins_pipe( pipe_slow );
12405 %}
12406 
12407 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12408                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12409   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12410   match(Set result (StrCompressedCopy src (Binary dst len)));
12411   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
12412          USE_KILL len, KILL tmp5, KILL cr);
12413 
12414   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12415   ins_encode %{
12416     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12417                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12418                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12419                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12420   %}
12421   ins_pipe( pipe_slow );
12422 %}
12423 // fast byte[] to char[] inflation
12424 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12425                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
12426   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12427   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12428   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12429 
12430   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12431   ins_encode %{
12432     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12433                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12434   %}
12435   ins_pipe( pipe_slow );
12436 %}
12437 
12438 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12439                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
12440   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12441   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12442   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12443 
12444   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12445   ins_encode %{
12446     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12447                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12448   %}
12449   ins_pipe( pipe_slow );
12450 %}
12451 
12452 // encode char[] to byte[] in ISO_8859_1
12453 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12454                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12455                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12456   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12457   match(Set result (EncodeISOArray src (Binary dst len)));
12458   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12459 
12460   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12461   ins_encode %{
12462     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12463                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12464                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12465   %}
12466   ins_pipe( pipe_slow );
12467 %}
12468 
12469 // encode char[] to byte[] in ASCII
12470 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12471                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12472                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12473   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12474   match(Set result (EncodeISOArray src (Binary dst len)));
12475   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12476 
12477   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12478   ins_encode %{
12479     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12480                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12481                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12482   %}
12483   ins_pipe( pipe_slow );
12484 %}
12485 
12486 //----------Overflow Math Instructions-----------------------------------------
12487 
12488 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12489 %{
12490   match(Set cr (OverflowAddI op1 op2));
12491   effect(DEF cr, USE_KILL op1, USE op2);
12492 
12493   format %{ "addl    $op1, $op2\t# overflow check int" %}
12494 
12495   ins_encode %{
12496     __ addl($op1$$Register, $op2$$Register);
12497   %}
12498   ins_pipe(ialu_reg_reg);
12499 %}
12500 
12501 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
12502 %{
12503   match(Set cr (OverflowAddI op1 op2));
12504   effect(DEF cr, USE_KILL op1, USE op2);
12505 
12506   format %{ "addl    $op1, $op2\t# overflow check int" %}
12507 
12508   ins_encode %{
12509     __ addl($op1$$Register, $op2$$constant);
12510   %}
12511   ins_pipe(ialu_reg_reg);
12512 %}
12513 
12514 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12515 %{
12516   match(Set cr (OverflowAddL op1 op2));
12517   effect(DEF cr, USE_KILL op1, USE op2);
12518 
12519   format %{ "addq    $op1, $op2\t# overflow check long" %}
12520   ins_encode %{
12521     __ addq($op1$$Register, $op2$$Register);
12522   %}
12523   ins_pipe(ialu_reg_reg);
12524 %}
12525 
12526 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
12527 %{
12528   match(Set cr (OverflowAddL op1 op2));
12529   effect(DEF cr, USE_KILL op1, USE op2);
12530 
12531   format %{ "addq    $op1, $op2\t# overflow check long" %}
12532   ins_encode %{
12533     __ addq($op1$$Register, $op2$$constant);
12534   %}
12535   ins_pipe(ialu_reg_reg);
12536 %}
12537 
12538 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12539 %{
12540   match(Set cr (OverflowSubI op1 op2));
12541 
12542   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12543   ins_encode %{
12544     __ cmpl($op1$$Register, $op2$$Register);
12545   %}
12546   ins_pipe(ialu_reg_reg);
12547 %}
12548 
12549 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12550 %{
12551   match(Set cr (OverflowSubI op1 op2));
12552 
12553   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12554   ins_encode %{
12555     __ cmpl($op1$$Register, $op2$$constant);
12556   %}
12557   ins_pipe(ialu_reg_reg);
12558 %}
12559 
12560 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12561 %{
12562   match(Set cr (OverflowSubL op1 op2));
12563 
12564   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12565   ins_encode %{
12566     __ cmpq($op1$$Register, $op2$$Register);
12567   %}
12568   ins_pipe(ialu_reg_reg);
12569 %}
12570 
12571 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12572 %{
12573   match(Set cr (OverflowSubL op1 op2));
12574 
12575   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12576   ins_encode %{
12577     __ cmpq($op1$$Register, $op2$$constant);
12578   %}
12579   ins_pipe(ialu_reg_reg);
12580 %}
12581 
12582 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
12583 %{
12584   match(Set cr (OverflowSubI zero op2));
12585   effect(DEF cr, USE_KILL op2);
12586 
12587   format %{ "negl    $op2\t# overflow check int" %}
12588   ins_encode %{
12589     __ negl($op2$$Register);
12590   %}
12591   ins_pipe(ialu_reg_reg);
12592 %}
12593 
12594 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
12595 %{
12596   match(Set cr (OverflowSubL zero op2));
12597   effect(DEF cr, USE_KILL op2);
12598 
12599   format %{ "negq    $op2\t# overflow check long" %}
12600   ins_encode %{
12601     __ negq($op2$$Register);
12602   %}
12603   ins_pipe(ialu_reg_reg);
12604 %}
12605 
12606 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12607 %{
12608   match(Set cr (OverflowMulI op1 op2));
12609   effect(DEF cr, USE_KILL op1, USE op2);
12610 
12611   format %{ "imull    $op1, $op2\t# overflow check int" %}
12612   ins_encode %{
12613     __ imull($op1$$Register, $op2$$Register);
12614   %}
12615   ins_pipe(ialu_reg_reg_alu0);
12616 %}
12617 
12618 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
12619 %{
12620   match(Set cr (OverflowMulI op1 op2));
12621   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12622 
12623   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
12624   ins_encode %{
12625     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
12626   %}
12627   ins_pipe(ialu_reg_reg_alu0);
12628 %}
12629 
12630 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12631 %{
12632   match(Set cr (OverflowMulL op1 op2));
12633   effect(DEF cr, USE_KILL op1, USE op2);
12634 
12635   format %{ "imulq    $op1, $op2\t# overflow check long" %}
12636   ins_encode %{
12637     __ imulq($op1$$Register, $op2$$Register);
12638   %}
12639   ins_pipe(ialu_reg_reg_alu0);
12640 %}
12641 
12642 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
12643 %{
12644   match(Set cr (OverflowMulL op1 op2));
12645   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12646 
12647   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12648   ins_encode %{
12649     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12650   %}
12651   ins_pipe(ialu_reg_reg_alu0);
12652 %}
12653 
12654 
12655 //----------Control Flow Instructions------------------------------------------
12656 // Signed compare Instructions
12657 
12658 // XXX more variants!!
12659 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12660 %{
12661   match(Set cr (CmpI op1 op2));
12662   effect(DEF cr, USE op1, USE op2);
12663 
12664   format %{ "cmpl    $op1, $op2" %}
12665   ins_encode %{
12666     __ cmpl($op1$$Register, $op2$$Register);
12667   %}
12668   ins_pipe(ialu_cr_reg_reg);
12669 %}
12670 
12671 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12672 %{
12673   match(Set cr (CmpI op1 op2));
12674 
12675   format %{ "cmpl    $op1, $op2" %}
12676   ins_encode %{
12677     __ cmpl($op1$$Register, $op2$$constant);
12678   %}
12679   ins_pipe(ialu_cr_reg_imm);
12680 %}
12681 
12682 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12683 %{
12684   match(Set cr (CmpI op1 (LoadI op2)));
12685 
12686   ins_cost(500); // XXX
12687   format %{ "cmpl    $op1, $op2" %}
12688   ins_encode %{
12689     __ cmpl($op1$$Register, $op2$$Address);
12690   %}
12691   ins_pipe(ialu_cr_reg_mem);
12692 %}
12693 
12694 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
12695 %{
12696   match(Set cr (CmpI src zero));
12697 
12698   format %{ "testl   $src, $src" %}
12699   ins_encode %{
12700     __ testl($src$$Register, $src$$Register);
12701   %}
12702   ins_pipe(ialu_cr_reg_imm);
12703 %}
12704 
12705 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
12706 %{
12707   match(Set cr (CmpI (AndI src con) zero));
12708 
12709   format %{ "testl   $src, $con" %}
12710   ins_encode %{
12711     __ testl($src$$Register, $con$$constant);
12712   %}
12713   ins_pipe(ialu_cr_reg_imm);
12714 %}
12715 
12716 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
12717 %{
12718   match(Set cr (CmpI (AndI src1 src2) zero));
12719 
12720   format %{ "testl   $src1, $src2" %}
12721   ins_encode %{
12722     __ testl($src1$$Register, $src2$$Register);
12723   %}
12724   ins_pipe(ialu_cr_reg_imm);
12725 %}
12726 
12727 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
12728 %{
12729   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12730 
12731   format %{ "testl   $src, $mem" %}
12732   ins_encode %{
12733     __ testl($src$$Register, $mem$$Address);
12734   %}
12735   ins_pipe(ialu_cr_reg_mem);
12736 %}
12737 
12738 // Unsigned compare Instructions; really, same as signed except they
12739 // produce an rFlagsRegU instead of rFlagsReg.
12740 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12741 %{
12742   match(Set cr (CmpU op1 op2));
12743 
12744   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12745   ins_encode %{
12746     __ cmpl($op1$$Register, $op2$$Register);
12747   %}
12748   ins_pipe(ialu_cr_reg_reg);
12749 %}
12750 
12751 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12752 %{
12753   match(Set cr (CmpU op1 op2));
12754 
12755   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12756   ins_encode %{
12757     __ cmpl($op1$$Register, $op2$$constant);
12758   %}
12759   ins_pipe(ialu_cr_reg_imm);
12760 %}
12761 
12762 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12763 %{
12764   match(Set cr (CmpU op1 (LoadI op2)));
12765 
12766   ins_cost(500); // XXX
12767   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12768   ins_encode %{
12769     __ cmpl($op1$$Register, $op2$$Address);
12770   %}
12771   ins_pipe(ialu_cr_reg_mem);
12772 %}
12773 
12774 // // // Cisc-spilled version of cmpU_rReg
12775 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12776 // //%{
12777 // //  match(Set cr (CmpU (LoadI op1) op2));
12778 // //
12779 // //  format %{ "CMPu   $op1,$op2" %}
12780 // //  ins_cost(500);
12781 // //  opcode(0x39);  /* Opcode 39 /r */
12782 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12783 // //%}
12784 
12785 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
12786 %{
12787   match(Set cr (CmpU src zero));
12788 
12789   format %{ "testl   $src, $src\t# unsigned" %}
12790   ins_encode %{
12791     __ testl($src$$Register, $src$$Register);
12792   %}
12793   ins_pipe(ialu_cr_reg_imm);
12794 %}
12795 
12796 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12797 %{
12798   match(Set cr (CmpP op1 op2));
12799 
12800   format %{ "cmpq    $op1, $op2\t# ptr" %}
12801   ins_encode %{
12802     __ cmpq($op1$$Register, $op2$$Register);
12803   %}
12804   ins_pipe(ialu_cr_reg_reg);
12805 %}
12806 
12807 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12808 %{
12809   match(Set cr (CmpP op1 (LoadP op2)));
12810   predicate(n->in(2)->as_Load()->barrier_data() == 0);
12811 
12812   ins_cost(500); // XXX
12813   format %{ "cmpq    $op1, $op2\t# ptr" %}
12814   ins_encode %{
12815     __ cmpq($op1$$Register, $op2$$Address);
12816   %}
12817   ins_pipe(ialu_cr_reg_mem);
12818 %}
12819 
12820 // // // Cisc-spilled version of cmpP_rReg
12821 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12822 // //%{
12823 // //  match(Set cr (CmpP (LoadP op1) op2));
12824 // //
12825 // //  format %{ "CMPu   $op1,$op2" %}
12826 // //  ins_cost(500);
12827 // //  opcode(0x39);  /* Opcode 39 /r */
12828 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12829 // //%}
12830 
12831 // XXX this is generalized by compP_rReg_mem???
12832 // Compare raw pointer (used in out-of-heap check).
12833 // Only works because non-oop pointers must be raw pointers
12834 // and raw pointers have no anti-dependencies.
12835 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12836 %{
12837   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
12838             n->in(2)->as_Load()->barrier_data() == 0);
12839   match(Set cr (CmpP op1 (LoadP op2)));
12840 
12841   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12842   ins_encode %{
12843     __ cmpq($op1$$Register, $op2$$Address);
12844   %}
12845   ins_pipe(ialu_cr_reg_mem);
12846 %}
12847 
12848 // This will generate a signed flags result. This should be OK since
12849 // any compare to a zero should be eq/neq.
12850 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12851 %{
12852   match(Set cr (CmpP src zero));
12853 
12854   format %{ "testq   $src, $src\t# ptr" %}
12855   ins_encode %{
12856     __ testq($src$$Register, $src$$Register);
12857   %}
12858   ins_pipe(ialu_cr_reg_imm);
12859 %}
12860 
12861 // This will generate a signed flags result. This should be OK since
12862 // any compare to a zero should be eq/neq.
12863 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12864 %{
12865   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
12866             n->in(1)->as_Load()->barrier_data() == 0);
12867   match(Set cr (CmpP (LoadP op) zero));
12868 
12869   ins_cost(500); // XXX
12870   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12871   ins_encode %{
12872     __ testq($op$$Address, 0xFFFFFFFF);
12873   %}
12874   ins_pipe(ialu_cr_reg_imm);
12875 %}
12876 
12877 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12878 %{
12879   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
12880             n->in(1)->as_Load()->barrier_data() == 0);
12881   match(Set cr (CmpP (LoadP mem) zero));
12882 
12883   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12884   ins_encode %{
12885     __ cmpq(r12, $mem$$Address);
12886   %}
12887   ins_pipe(ialu_cr_reg_mem);
12888 %}
12889 
12890 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12891 %{
12892   match(Set cr (CmpN op1 op2));
12893 
12894   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12895   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12896   ins_pipe(ialu_cr_reg_reg);
12897 %}
12898 
12899 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12900 %{
12901   match(Set cr (CmpN src (LoadN mem)));
12902 
12903   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12904   ins_encode %{
12905     __ cmpl($src$$Register, $mem$$Address);
12906   %}
12907   ins_pipe(ialu_cr_reg_mem);
12908 %}
12909 
12910 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12911   match(Set cr (CmpN op1 op2));
12912 
12913   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12914   ins_encode %{
12915     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12916   %}
12917   ins_pipe(ialu_cr_reg_imm);
12918 %}
12919 
12920 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12921 %{
12922   match(Set cr (CmpN src (LoadN mem)));
12923 
12924   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12925   ins_encode %{
12926     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12927   %}
12928   ins_pipe(ialu_cr_reg_mem);
12929 %}
12930 
12931 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12932   match(Set cr (CmpN op1 op2));
12933 
12934   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12935   ins_encode %{
12936     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12937   %}
12938   ins_pipe(ialu_cr_reg_imm);
12939 %}
12940 
12941 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12942 %{
12943   match(Set cr (CmpN src (LoadNKlass mem)));
12944 
12945   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12946   ins_encode %{
12947     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12948   %}
12949   ins_pipe(ialu_cr_reg_mem);
12950 %}
12951 
12952 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12953   match(Set cr (CmpN src zero));
12954 
12955   format %{ "testl   $src, $src\t# compressed ptr" %}
12956   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12957   ins_pipe(ialu_cr_reg_imm);
12958 %}
12959 
12960 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12961 %{
12962   predicate(CompressedOops::base() != nullptr);
12963   match(Set cr (CmpN (LoadN mem) zero));
12964 
12965   ins_cost(500); // XXX
12966   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12967   ins_encode %{
12968     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12969   %}
12970   ins_pipe(ialu_cr_reg_mem);
12971 %}
12972 
12973 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12974 %{
12975   predicate(CompressedOops::base() == nullptr);
12976   match(Set cr (CmpN (LoadN mem) zero));
12977 
12978   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12979   ins_encode %{
12980     __ cmpl(r12, $mem$$Address);
12981   %}
12982   ins_pipe(ialu_cr_reg_mem);
12983 %}
12984 
12985 // Yanked all unsigned pointer compare operations.
12986 // Pointer compares are done with CmpP which is already unsigned.
12987 
12988 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12989 %{
12990   match(Set cr (CmpL op1 op2));
12991 
12992   format %{ "cmpq    $op1, $op2" %}
12993   ins_encode %{
12994     __ cmpq($op1$$Register, $op2$$Register);
12995   %}
12996   ins_pipe(ialu_cr_reg_reg);
12997 %}
12998 
12999 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
13000 %{
13001   match(Set cr (CmpL op1 op2));
13002 
13003   format %{ "cmpq    $op1, $op2" %}
13004   ins_encode %{
13005     __ cmpq($op1$$Register, $op2$$constant);
13006   %}
13007   ins_pipe(ialu_cr_reg_imm);
13008 %}
13009 
13010 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
13011 %{
13012   match(Set cr (CmpL op1 (LoadL op2)));
13013 
13014   format %{ "cmpq    $op1, $op2" %}
13015   ins_encode %{
13016     __ cmpq($op1$$Register, $op2$$Address);
13017   %}
13018   ins_pipe(ialu_cr_reg_mem);
13019 %}
13020 
13021 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
13022 %{
13023   match(Set cr (CmpL src zero));
13024 
13025   format %{ "testq   $src, $src" %}
13026   ins_encode %{
13027     __ testq($src$$Register, $src$$Register);
13028   %}
13029   ins_pipe(ialu_cr_reg_imm);
13030 %}
13031 
13032 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
13033 %{
13034   match(Set cr (CmpL (AndL src con) zero));
13035 
13036   format %{ "testq   $src, $con\t# long" %}
13037   ins_encode %{
13038     __ testq($src$$Register, $con$$constant);
13039   %}
13040   ins_pipe(ialu_cr_reg_imm);
13041 %}
13042 
13043 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
13044 %{
13045   match(Set cr (CmpL (AndL src1 src2) zero));
13046 
13047   format %{ "testq   $src1, $src2\t# long" %}
13048   ins_encode %{
13049     __ testq($src1$$Register, $src2$$Register);
13050   %}
13051   ins_pipe(ialu_cr_reg_imm);
13052 %}
13053 
13054 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
13055 %{
13056   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
13057 
13058   format %{ "testq   $src, $mem" %}
13059   ins_encode %{
13060     __ testq($src$$Register, $mem$$Address);
13061   %}
13062   ins_pipe(ialu_cr_reg_mem);
13063 %}
13064 
13065 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
13066 %{
13067   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
13068 
13069   format %{ "testq   $src, $mem" %}
13070   ins_encode %{
13071     __ testq($src$$Register, $mem$$Address);
13072   %}
13073   ins_pipe(ialu_cr_reg_mem);
13074 %}
13075 
13076 // Manifest a CmpU result in an integer register.  Very painful.
13077 // This is the test to avoid.
13078 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
13079 %{
13080   match(Set dst (CmpU3 src1 src2));
13081   effect(KILL flags);
13082 
13083   ins_cost(275); // XXX
13084   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
13085             "movl    $dst, -1\n\t"
13086             "jb,u    done\n\t"
13087             "setne   $dst\n\t"
13088             "movzbl  $dst, $dst\n\t"
13089     "done:" %}
13090   ins_encode %{
13091     Label done;
13092     __ cmpl($src1$$Register, $src2$$Register);
13093     __ movl($dst$$Register, -1);
13094     __ jccb(Assembler::below, done);
13095     __ setb(Assembler::notZero, $dst$$Register);
13096     __ movzbl($dst$$Register, $dst$$Register);
13097     __ bind(done);
13098   %}
13099   ins_pipe(pipe_slow);
13100 %}
13101 
13102 // Manifest a CmpL result in an integer register.  Very painful.
13103 // This is the test to avoid.
13104 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
13105 %{
13106   match(Set dst (CmpL3 src1 src2));
13107   effect(KILL flags);
13108 
13109   ins_cost(275); // XXX
13110   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
13111             "movl    $dst, -1\n\t"
13112             "jl,s    done\n\t"
13113             "setne   $dst\n\t"
13114             "movzbl  $dst, $dst\n\t"
13115     "done:" %}
13116   ins_encode %{
13117     Label done;
13118     __ cmpq($src1$$Register, $src2$$Register);
13119     __ movl($dst$$Register, -1);
13120     __ jccb(Assembler::less, done);
13121     __ setb(Assembler::notZero, $dst$$Register);
13122     __ movzbl($dst$$Register, $dst$$Register);
13123     __ bind(done);
13124   %}
13125   ins_pipe(pipe_slow);
13126 %}
13127 
13128 // Manifest a CmpUL result in an integer register.  Very painful.
13129 // This is the test to avoid.
13130 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
13131 %{
13132   match(Set dst (CmpUL3 src1 src2));
13133   effect(KILL flags);
13134 
13135   ins_cost(275); // XXX
13136   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
13137             "movl    $dst, -1\n\t"
13138             "jb,u    done\n\t"
13139             "setne   $dst\n\t"
13140             "movzbl  $dst, $dst\n\t"
13141     "done:" %}
13142   ins_encode %{
13143     Label done;
13144     __ cmpq($src1$$Register, $src2$$Register);
13145     __ movl($dst$$Register, -1);
13146     __ jccb(Assembler::below, done);
13147     __ setb(Assembler::notZero, $dst$$Register);
13148     __ movzbl($dst$$Register, $dst$$Register);
13149     __ bind(done);
13150   %}
13151   ins_pipe(pipe_slow);
13152 %}
13153 
13154 // Unsigned long compare Instructions; really, same as signed long except they
13155 // produce an rFlagsRegU instead of rFlagsReg.
13156 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
13157 %{
13158   match(Set cr (CmpUL op1 op2));
13159 
13160   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13161   ins_encode %{
13162     __ cmpq($op1$$Register, $op2$$Register);
13163   %}
13164   ins_pipe(ialu_cr_reg_reg);
13165 %}
13166 
13167 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
13168 %{
13169   match(Set cr (CmpUL op1 op2));
13170 
13171   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13172   ins_encode %{
13173     __ cmpq($op1$$Register, $op2$$constant);
13174   %}
13175   ins_pipe(ialu_cr_reg_imm);
13176 %}
13177 
13178 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
13179 %{
13180   match(Set cr (CmpUL op1 (LoadL op2)));
13181 
13182   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13183   ins_encode %{
13184     __ cmpq($op1$$Register, $op2$$Address);
13185   %}
13186   ins_pipe(ialu_cr_reg_mem);
13187 %}
13188 
13189 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
13190 %{
13191   match(Set cr (CmpUL src zero));
13192 
13193   format %{ "testq   $src, $src\t# unsigned" %}
13194   ins_encode %{
13195     __ testq($src$$Register, $src$$Register);
13196   %}
13197   ins_pipe(ialu_cr_reg_imm);
13198 %}
13199 
13200 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
13201 %{
13202   match(Set cr (CmpI (LoadB mem) imm));
13203 
13204   ins_cost(125);
13205   format %{ "cmpb    $mem, $imm" %}
13206   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
13207   ins_pipe(ialu_cr_reg_mem);
13208 %}
13209 
13210 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
13211 %{
13212   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
13213 
13214   ins_cost(125);
13215   format %{ "testb   $mem, $imm\t# ubyte" %}
13216   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13217   ins_pipe(ialu_cr_reg_mem);
13218 %}
13219 
13220 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
13221 %{
13222   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
13223 
13224   ins_cost(125);
13225   format %{ "testb   $mem, $imm\t# byte" %}
13226   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13227   ins_pipe(ialu_cr_reg_mem);
13228 %}
13229 
13230 //----------Max and Min--------------------------------------------------------
13231 // Min Instructions
13232 
13233 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
13234 %{
13235   effect(USE_DEF dst, USE src, USE cr);
13236 
13237   format %{ "cmovlgt $dst, $src\t# min" %}
13238   ins_encode %{
13239     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
13240   %}
13241   ins_pipe(pipe_cmov_reg);
13242 %}
13243 
13244 
13245 instruct minI_rReg(rRegI dst, rRegI src)
13246 %{
13247   match(Set dst (MinI dst src));
13248 
13249   ins_cost(200);
13250   expand %{
13251     rFlagsReg cr;
13252     compI_rReg(cr, dst, src);
13253     cmovI_reg_g(dst, src, cr);
13254   %}
13255 %}
13256 
13257 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
13258 %{
13259   effect(USE_DEF dst, USE src, USE cr);
13260 
13261   format %{ "cmovllt $dst, $src\t# max" %}
13262   ins_encode %{
13263     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
13264   %}
13265   ins_pipe(pipe_cmov_reg);
13266 %}
13267 
13268 
13269 instruct maxI_rReg(rRegI dst, rRegI src)
13270 %{
13271   match(Set dst (MaxI dst src));
13272 
13273   ins_cost(200);
13274   expand %{
13275     rFlagsReg cr;
13276     compI_rReg(cr, dst, src);
13277     cmovI_reg_l(dst, src, cr);
13278   %}
13279 %}
13280 
13281 // ============================================================================
13282 // Branch Instructions
13283 
13284 // Jump Direct - Label defines a relative address from JMP+1
13285 instruct jmpDir(label labl)
13286 %{
13287   match(Goto);
13288   effect(USE labl);
13289 
13290   ins_cost(300);
13291   format %{ "jmp     $labl" %}
13292   size(5);
13293   ins_encode %{
13294     Label* L = $labl$$label;
13295     __ jmp(*L, false); // Always long jump
13296   %}
13297   ins_pipe(pipe_jmp);
13298 %}
13299 
13300 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13301 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
13302 %{
13303   match(If cop cr);
13304   effect(USE labl);
13305 
13306   ins_cost(300);
13307   format %{ "j$cop     $labl" %}
13308   size(6);
13309   ins_encode %{
13310     Label* L = $labl$$label;
13311     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13312   %}
13313   ins_pipe(pipe_jcc);
13314 %}
13315 
13316 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13317 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
13318 %{
13319   match(CountedLoopEnd cop cr);
13320   effect(USE labl);
13321 
13322   ins_cost(300);
13323   format %{ "j$cop     $labl\t# loop end" %}
13324   size(6);
13325   ins_encode %{
13326     Label* L = $labl$$label;
13327     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13328   %}
13329   ins_pipe(pipe_jcc);
13330 %}
13331 
13332 // Jump Direct Conditional - using unsigned comparison
13333 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13334   match(If cop cmp);
13335   effect(USE labl);
13336 
13337   ins_cost(300);
13338   format %{ "j$cop,u   $labl" %}
13339   size(6);
13340   ins_encode %{
13341     Label* L = $labl$$label;
13342     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13343   %}
13344   ins_pipe(pipe_jcc);
13345 %}
13346 
13347 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13348   match(If cop cmp);
13349   effect(USE labl);
13350 
13351   ins_cost(200);
13352   format %{ "j$cop,u   $labl" %}
13353   size(6);
13354   ins_encode %{
13355     Label* L = $labl$$label;
13356     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13357   %}
13358   ins_pipe(pipe_jcc);
13359 %}
13360 
13361 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13362   match(If cop cmp);
13363   effect(USE labl);
13364 
13365   ins_cost(200);
13366   format %{ $$template
13367     if ($cop$$cmpcode == Assembler::notEqual) {
13368       $$emit$$"jp,u    $labl\n\t"
13369       $$emit$$"j$cop,u   $labl"
13370     } else {
13371       $$emit$$"jp,u    done\n\t"
13372       $$emit$$"j$cop,u   $labl\n\t"
13373       $$emit$$"done:"
13374     }
13375   %}
13376   ins_encode %{
13377     Label* l = $labl$$label;
13378     if ($cop$$cmpcode == Assembler::notEqual) {
13379       __ jcc(Assembler::parity, *l, false);
13380       __ jcc(Assembler::notEqual, *l, false);
13381     } else if ($cop$$cmpcode == Assembler::equal) {
13382       Label done;
13383       __ jccb(Assembler::parity, done);
13384       __ jcc(Assembler::equal, *l, false);
13385       __ bind(done);
13386     } else {
13387        ShouldNotReachHere();
13388     }
13389   %}
13390   ins_pipe(pipe_jcc);
13391 %}
13392 
13393 // ============================================================================
13394 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
13395 // superklass array for an instance of the superklass.  Set a hidden
13396 // internal cache on a hit (cache is checked with exposed code in
13397 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13398 // encoding ALSO sets flags.
13399 
13400 instruct partialSubtypeCheck(rdi_RegP result,
13401                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13402                              rFlagsReg cr)
13403 %{
13404   match(Set result (PartialSubtypeCheck sub super));
13405   effect(KILL rcx, KILL cr);
13406 
13407   ins_cost(1100);  // slightly larger than the next version
13408   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13409             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13410             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13411             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
13412             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
13413             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13414             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
13415     "miss:\t" %}
13416 
13417   opcode(0x1); // Force a XOR of RDI
13418   ins_encode(enc_PartialSubtypeCheck());
13419   ins_pipe(pipe_slow);
13420 %}
13421 
13422 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
13423                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13424                                      immP0 zero,
13425                                      rdi_RegP result)
13426 %{
13427   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13428   effect(KILL rcx, KILL result);
13429 
13430   ins_cost(1000);
13431   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13432             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13433             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13434             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
13435             "jne,s   miss\t\t# Missed: flags nz\n\t"
13436             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13437     "miss:\t" %}
13438 
13439   opcode(0x0); // No need to XOR RDI
13440   ins_encode(enc_PartialSubtypeCheck());
13441   ins_pipe(pipe_slow);
13442 %}
13443 
13444 // ============================================================================
13445 // Branch Instructions -- short offset versions
13446 //
13447 // These instructions are used to replace jumps of a long offset (the default
13448 // match) with jumps of a shorter offset.  These instructions are all tagged
13449 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13450 // match rules in general matching.  Instead, the ADLC generates a conversion
13451 // method in the MachNode which can be used to do in-place replacement of the
13452 // long variant with the shorter variant.  The compiler will determine if a
13453 // branch can be taken by the is_short_branch_offset() predicate in the machine
13454 // specific code section of the file.
13455 
13456 // Jump Direct - Label defines a relative address from JMP+1
13457 instruct jmpDir_short(label labl) %{
13458   match(Goto);
13459   effect(USE labl);
13460 
13461   ins_cost(300);
13462   format %{ "jmp,s   $labl" %}
13463   size(2);
13464   ins_encode %{
13465     Label* L = $labl$$label;
13466     __ jmpb(*L);
13467   %}
13468   ins_pipe(pipe_jmp);
13469   ins_short_branch(1);
13470 %}
13471 
13472 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13473 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
13474   match(If cop cr);
13475   effect(USE labl);
13476 
13477   ins_cost(300);
13478   format %{ "j$cop,s   $labl" %}
13479   size(2);
13480   ins_encode %{
13481     Label* L = $labl$$label;
13482     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13483   %}
13484   ins_pipe(pipe_jcc);
13485   ins_short_branch(1);
13486 %}
13487 
13488 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13489 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
13490   match(CountedLoopEnd cop cr);
13491   effect(USE labl);
13492 
13493   ins_cost(300);
13494   format %{ "j$cop,s   $labl\t# loop end" %}
13495   size(2);
13496   ins_encode %{
13497     Label* L = $labl$$label;
13498     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13499   %}
13500   ins_pipe(pipe_jcc);
13501   ins_short_branch(1);
13502 %}
13503 
13504 // Jump Direct Conditional - using unsigned comparison
13505 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13506   match(If cop cmp);
13507   effect(USE labl);
13508 
13509   ins_cost(300);
13510   format %{ "j$cop,us  $labl" %}
13511   size(2);
13512   ins_encode %{
13513     Label* L = $labl$$label;
13514     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13515   %}
13516   ins_pipe(pipe_jcc);
13517   ins_short_branch(1);
13518 %}
13519 
13520 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13521   match(If cop cmp);
13522   effect(USE labl);
13523 
13524   ins_cost(300);
13525   format %{ "j$cop,us  $labl" %}
13526   size(2);
13527   ins_encode %{
13528     Label* L = $labl$$label;
13529     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13530   %}
13531   ins_pipe(pipe_jcc);
13532   ins_short_branch(1);
13533 %}
13534 
13535 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13536   match(If cop cmp);
13537   effect(USE labl);
13538 
13539   ins_cost(300);
13540   format %{ $$template
13541     if ($cop$$cmpcode == Assembler::notEqual) {
13542       $$emit$$"jp,u,s  $labl\n\t"
13543       $$emit$$"j$cop,u,s  $labl"
13544     } else {
13545       $$emit$$"jp,u,s  done\n\t"
13546       $$emit$$"j$cop,u,s  $labl\n\t"
13547       $$emit$$"done:"
13548     }
13549   %}
13550   size(4);
13551   ins_encode %{
13552     Label* l = $labl$$label;
13553     if ($cop$$cmpcode == Assembler::notEqual) {
13554       __ jccb(Assembler::parity, *l);
13555       __ jccb(Assembler::notEqual, *l);
13556     } else if ($cop$$cmpcode == Assembler::equal) {
13557       Label done;
13558       __ jccb(Assembler::parity, done);
13559       __ jccb(Assembler::equal, *l);
13560       __ bind(done);
13561     } else {
13562        ShouldNotReachHere();
13563     }
13564   %}
13565   ins_pipe(pipe_jcc);
13566   ins_short_branch(1);
13567 %}
13568 
13569 // ============================================================================
13570 // inlined locking and unlocking
13571 
13572 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
13573   predicate(Compile::current()->use_rtm());
13574   match(Set cr (FastLock object box));
13575   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13576   ins_cost(300);
13577   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13578   ins_encode %{
13579     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13580                  $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread,
13581                  _rtm_counters, _stack_rtm_counters,
13582                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13583                  true, ra_->C->profile_rtm());
13584   %}
13585   ins_pipe(pipe_slow);
13586 %}
13587 
13588 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
13589   predicate(!Compile::current()->use_rtm());
13590   match(Set cr (FastLock object box));
13591   effect(TEMP tmp, TEMP scr, USE_KILL box);
13592   ins_cost(300);
13593   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
13594   ins_encode %{
13595     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13596                  $scr$$Register, noreg, noreg, r15_thread, nullptr, nullptr, nullptr, false, false);
13597   %}
13598   ins_pipe(pipe_slow);
13599 %}
13600 
13601 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13602   match(Set cr (FastUnlock object box));
13603   effect(TEMP tmp, USE_KILL box);
13604   ins_cost(300);
13605   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13606   ins_encode %{
13607     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13608   %}
13609   ins_pipe(pipe_slow);
13610 %}
13611 
13612 
13613 // ============================================================================
13614 // Safepoint Instructions
13615 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13616 %{
13617   match(SafePoint poll);
13618   effect(KILL cr, USE poll);
13619 
13620   format %{ "testl   rax, [$poll]\t"
13621             "# Safepoint: poll for GC" %}
13622   ins_cost(125);
13623   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13624   ins_encode %{
13625     __ relocate(relocInfo::poll_type);
13626     address pre_pc = __ pc();
13627     __ testl(rax, Address($poll$$Register, 0));
13628     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13629   %}
13630   ins_pipe(ialu_reg_mem);
13631 %}
13632 
13633 instruct mask_all_evexL(kReg dst, rRegL src) %{
13634   match(Set dst (MaskAll src));
13635   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
13636   ins_encode %{
13637     int mask_len = Matcher::vector_length(this);
13638     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13639   %}
13640   ins_pipe( pipe_slow );
13641 %}
13642 
13643 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
13644   predicate(Matcher::vector_length(n) > 32);
13645   match(Set dst (MaskAll src));
13646   effect(TEMP tmp);
13647   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
13648   ins_encode %{
13649     int mask_len = Matcher::vector_length(this);
13650     __ movslq($tmp$$Register, $src$$Register);
13651     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
13652   %}
13653   ins_pipe( pipe_slow );
13654 %}
13655 
13656 // ============================================================================
13657 // Procedure Call/Return Instructions
13658 // Call Java Static Instruction
13659 // Note: If this code changes, the corresponding ret_addr_offset() and
13660 //       compute_padding() functions will have to be adjusted.
13661 instruct CallStaticJavaDirect(method meth) %{
13662   match(CallStaticJava);
13663   effect(USE meth);
13664 
13665   ins_cost(300);
13666   format %{ "call,static " %}
13667   opcode(0xE8); /* E8 cd */
13668   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13669   ins_pipe(pipe_slow);
13670   ins_alignment(4);
13671 %}
13672 
13673 // Call Java Dynamic Instruction
13674 // Note: If this code changes, the corresponding ret_addr_offset() and
13675 //       compute_padding() functions will have to be adjusted.
13676 instruct CallDynamicJavaDirect(method meth)
13677 %{
13678   match(CallDynamicJava);
13679   effect(USE meth);
13680 
13681   ins_cost(300);
13682   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13683             "call,dynamic " %}
13684   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13685   ins_pipe(pipe_slow);
13686   ins_alignment(4);
13687 %}
13688 
13689 // Call Runtime Instruction
13690 instruct CallRuntimeDirect(method meth)
13691 %{
13692   match(CallRuntime);
13693   effect(USE meth);
13694 
13695   ins_cost(300);
13696   format %{ "call,runtime " %}
13697   ins_encode(clear_avx, Java_To_Runtime(meth));
13698   ins_pipe(pipe_slow);
13699 %}
13700 
13701 // Call runtime without safepoint
13702 instruct CallLeafDirect(method meth)
13703 %{
13704   match(CallLeaf);
13705   effect(USE meth);
13706 
13707   ins_cost(300);
13708   format %{ "call_leaf,runtime " %}
13709   ins_encode(clear_avx, Java_To_Runtime(meth));
13710   ins_pipe(pipe_slow);
13711 %}
13712 
13713 // Call runtime without safepoint and with vector arguments
13714 instruct CallLeafDirectVector(method meth)
13715 %{
13716   match(CallLeafVector);
13717   effect(USE meth);
13718 
13719   ins_cost(300);
13720   format %{ "call_leaf,vector " %}
13721   ins_encode(Java_To_Runtime(meth));
13722   ins_pipe(pipe_slow);
13723 %}
13724 
13725 // Call runtime without safepoint
13726 // entry point is null, target holds the address to call
13727 instruct CallLeafNoFPInDirect(rRegP target)
13728 %{
13729   predicate(n->as_Call()->entry_point() == nullptr);
13730   match(CallLeafNoFP target);
13731 
13732   ins_cost(300);
13733   format %{ "call_leaf_nofp,runtime indirect " %}
13734   ins_encode %{
13735      __ call($target$$Register);
13736   %}
13737 
13738   ins_pipe(pipe_slow);
13739 %}
13740 
13741 instruct CallLeafNoFPDirect(method meth)
13742 %{
13743   predicate(n->as_Call()->entry_point() != nullptr);
13744   match(CallLeafNoFP);
13745   effect(USE meth);
13746 
13747   ins_cost(300);
13748   format %{ "call_leaf_nofp,runtime " %}
13749   ins_encode(clear_avx, Java_To_Runtime(meth));
13750   ins_pipe(pipe_slow);
13751 %}
13752 
13753 // Return Instruction
13754 // Remove the return address & jump to it.
13755 // Notice: We always emit a nop after a ret to make sure there is room
13756 // for safepoint patching
13757 instruct Ret()
13758 %{
13759   match(Return);
13760 
13761   format %{ "ret" %}
13762   ins_encode %{
13763     __ ret(0);
13764   %}
13765   ins_pipe(pipe_jmp);
13766 %}
13767 
13768 // Tail Call; Jump from runtime stub to Java code.
13769 // Also known as an 'interprocedural jump'.
13770 // Target of jump will eventually return to caller.
13771 // TailJump below removes the return address.
13772 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
13773 %{
13774   match(TailCall jump_target method_ptr);
13775 
13776   ins_cost(300);
13777   format %{ "jmp     $jump_target\t# rbx holds method" %}
13778   ins_encode %{
13779     __ jmp($jump_target$$Register);
13780   %}
13781   ins_pipe(pipe_jmp);
13782 %}
13783 
13784 // Tail Jump; remove the return address; jump to target.
13785 // TailCall above leaves the return address around.
13786 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13787 %{
13788   match(TailJump jump_target ex_oop);
13789 
13790   ins_cost(300);
13791   format %{ "popq    rdx\t# pop return address\n\t"
13792             "jmp     $jump_target" %}
13793   ins_encode %{
13794     __ popq(as_Register(RDX_enc));
13795     __ jmp($jump_target$$Register);
13796   %}
13797   ins_pipe(pipe_jmp);
13798 %}
13799 
13800 // Create exception oop: created by stack-crawling runtime code.
13801 // Created exception is now available to this handler, and is setup
13802 // just prior to jumping to this handler.  No code emitted.
13803 instruct CreateException(rax_RegP ex_oop)
13804 %{
13805   match(Set ex_oop (CreateEx));
13806 
13807   size(0);
13808   // use the following format syntax
13809   format %{ "# exception oop is in rax; no code emitted" %}
13810   ins_encode();
13811   ins_pipe(empty);
13812 %}
13813 
13814 // Rethrow exception:
13815 // The exception oop will come in the first argument position.
13816 // Then JUMP (not call) to the rethrow stub code.
13817 instruct RethrowException()
13818 %{
13819   match(Rethrow);
13820 
13821   // use the following format syntax
13822   format %{ "jmp     rethrow_stub" %}
13823   ins_encode(enc_rethrow);
13824   ins_pipe(pipe_jmp);
13825 %}
13826 
13827 // ============================================================================
13828 // This name is KNOWN by the ADLC and cannot be changed.
13829 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13830 // for this guy.
13831 instruct tlsLoadP(r15_RegP dst) %{
13832   match(Set dst (ThreadLocal));
13833   effect(DEF dst);
13834 
13835   size(0);
13836   format %{ "# TLS is in R15" %}
13837   ins_encode( /*empty encoding*/ );
13838   ins_pipe(ialu_reg_reg);
13839 %}
13840 
13841 
13842 //----------PEEPHOLE RULES-----------------------------------------------------
13843 // These must follow all instruction definitions as they use the names
13844 // defined in the instructions definitions.
13845 //
13846 // peeppredicate ( rule_predicate );
13847 // // the predicate unless which the peephole rule will be ignored
13848 //
13849 // peepmatch ( root_instr_name [preceding_instruction]* );
13850 //
13851 // peepprocedure ( procedure_name );
13852 // // provide a procedure name to perform the optimization, the procedure should
13853 // // reside in the architecture dependent peephole file, the method has the
13854 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
13855 // // with the arguments being the basic block, the current node index inside the
13856 // // block, the register allocator, the functions upon invoked return a new node
13857 // // defined in peepreplace, and the rules of the nodes appearing in the
13858 // // corresponding peepmatch, the function return true if successful, else
13859 // // return false
13860 //
13861 // peepconstraint %{
13862 // (instruction_number.operand_name relational_op instruction_number.operand_name
13863 //  [, ...] );
13864 // // instruction numbers are zero-based using left to right order in peepmatch
13865 //
13866 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13867 // // provide an instruction_number.operand_name for each operand that appears
13868 // // in the replacement instruction's match rule
13869 //
13870 // ---------VM FLAGS---------------------------------------------------------
13871 //
13872 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13873 //
13874 // Each peephole rule is given an identifying number starting with zero and
13875 // increasing by one in the order seen by the parser.  An individual peephole
13876 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13877 // on the command-line.
13878 //
13879 // ---------CURRENT LIMITATIONS----------------------------------------------
13880 //
13881 // Only transformations inside a basic block (do we need more for peephole)
13882 //
13883 // ---------EXAMPLE----------------------------------------------------------
13884 //
13885 // // pertinent parts of existing instructions in architecture description
13886 // instruct movI(rRegI dst, rRegI src)
13887 // %{
13888 //   match(Set dst (CopyI src));
13889 // %}
13890 //
13891 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13892 // %{
13893 //   match(Set dst (AddI dst src));
13894 //   effect(KILL cr);
13895 // %}
13896 //
13897 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
13898 // %{
13899 //   match(Set dst (AddI dst src));
13900 // %}
13901 //
13902 // 1. Simple replacement
13903 // - Only match adjacent instructions in same basic block
13904 // - Only equality constraints
13905 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
13906 // - Only one replacement instruction
13907 //
13908 // // Change (inc mov) to lea
13909 // peephole %{
13910 //   // lea should only be emitted when beneficial
13911 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13912 //   // increment preceded by register-register move
13913 //   peepmatch ( incI_rReg movI );
13914 //   // require that the destination register of the increment
13915 //   // match the destination register of the move
13916 //   peepconstraint ( 0.dst == 1.dst );
13917 //   // construct a replacement instruction that sets
13918 //   // the destination to ( move's source register + one )
13919 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13920 // %}
13921 //
13922 // 2. Procedural replacement
13923 // - More flexible finding relevent nodes
13924 // - More flexible constraints
13925 // - More flexible transformations
13926 // - May utilise architecture-dependent API more effectively
13927 // - Currently only one replacement instruction due to adlc parsing capabilities
13928 //
13929 // // Change (inc mov) to lea
13930 // peephole %{
13931 //   // lea should only be emitted when beneficial
13932 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13933 //   // the rule numbers of these nodes inside are passed into the function below
13934 //   peepmatch ( incI_rReg movI );
13935 //   // the method that takes the responsibility of transformation
13936 //   peepprocedure ( inc_mov_to_lea );
13937 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
13938 //   // node is passed into the function above
13939 //   peepreplace ( leaI_rReg_immI() );
13940 // %}
13941 
13942 // These instructions is not matched by the matcher but used by the peephole
13943 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
13944 %{
13945   predicate(false);
13946   match(Set dst (AddI src1 src2));
13947   format %{ "leal    $dst, [$src1 + $src2]" %}
13948   ins_encode %{
13949     Register dst = $dst$$Register;
13950     Register src1 = $src1$$Register;
13951     Register src2 = $src2$$Register;
13952     if (src1 != rbp && src1 != r13) {
13953       __ leal(dst, Address(src1, src2, Address::times_1));
13954     } else {
13955       assert(src2 != rbp && src2 != r13, "");
13956       __ leal(dst, Address(src2, src1, Address::times_1));
13957     }
13958   %}
13959   ins_pipe(ialu_reg_reg);
13960 %}
13961 
13962 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
13963 %{
13964   predicate(false);
13965   match(Set dst (AddI src1 src2));
13966   format %{ "leal    $dst, [$src1 + $src2]" %}
13967   ins_encode %{
13968     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
13969   %}
13970   ins_pipe(ialu_reg_reg);
13971 %}
13972 
13973 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
13974 %{
13975   predicate(false);
13976   match(Set dst (LShiftI src shift));
13977   format %{ "leal    $dst, [$src << $shift]" %}
13978   ins_encode %{
13979     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13980     Register src = $src$$Register;
13981     if (scale == Address::times_2 && src != rbp && src != r13) {
13982       __ leal($dst$$Register, Address(src, src, Address::times_1));
13983     } else {
13984       __ leal($dst$$Register, Address(noreg, src, scale));
13985     }
13986   %}
13987   ins_pipe(ialu_reg_reg);
13988 %}
13989 
13990 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
13991 %{
13992   predicate(false);
13993   match(Set dst (AddL src1 src2));
13994   format %{ "leaq    $dst, [$src1 + $src2]" %}
13995   ins_encode %{
13996     Register dst = $dst$$Register;
13997     Register src1 = $src1$$Register;
13998     Register src2 = $src2$$Register;
13999     if (src1 != rbp && src1 != r13) {
14000       __ leaq(dst, Address(src1, src2, Address::times_1));
14001     } else {
14002       assert(src2 != rbp && src2 != r13, "");
14003       __ leaq(dst, Address(src2, src1, Address::times_1));
14004     }
14005   %}
14006   ins_pipe(ialu_reg_reg);
14007 %}
14008 
14009 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
14010 %{
14011   predicate(false);
14012   match(Set dst (AddL src1 src2));
14013   format %{ "leaq    $dst, [$src1 + $src2]" %}
14014   ins_encode %{
14015     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
14016   %}
14017   ins_pipe(ialu_reg_reg);
14018 %}
14019 
14020 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
14021 %{
14022   predicate(false);
14023   match(Set dst (LShiftL src shift));
14024   format %{ "leaq    $dst, [$src << $shift]" %}
14025   ins_encode %{
14026     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
14027     Register src = $src$$Register;
14028     if (scale == Address::times_2 && src != rbp && src != r13) {
14029       __ leaq($dst$$Register, Address(src, src, Address::times_1));
14030     } else {
14031       __ leaq($dst$$Register, Address(noreg, src, scale));
14032     }
14033   %}
14034   ins_pipe(ialu_reg_reg);
14035 %}
14036 
14037 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
14038 // sal}) with lea instructions. The {add, sal} rules are beneficial in
14039 // processors with at least partial ALU support for lea
14040 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
14041 // beneficial for processors with full ALU support
14042 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
14043 
14044 peephole
14045 %{
14046   peeppredicate(VM_Version::supports_fast_2op_lea());
14047   peepmatch (addI_rReg);
14048   peepprocedure (lea_coalesce_reg);
14049   peepreplace (leaI_rReg_rReg_peep());
14050 %}
14051 
14052 peephole
14053 %{
14054   peeppredicate(VM_Version::supports_fast_2op_lea());
14055   peepmatch (addI_rReg_imm);
14056   peepprocedure (lea_coalesce_imm);
14057   peepreplace (leaI_rReg_immI_peep());
14058 %}
14059 
14060 peephole
14061 %{
14062   peeppredicate(VM_Version::supports_fast_3op_lea() ||
14063                 VM_Version::is_intel_cascade_lake());
14064   peepmatch (incI_rReg);
14065   peepprocedure (lea_coalesce_imm);
14066   peepreplace (leaI_rReg_immI_peep());
14067 %}
14068 
14069 peephole
14070 %{
14071   peeppredicate(VM_Version::supports_fast_3op_lea() ||
14072                 VM_Version::is_intel_cascade_lake());
14073   peepmatch (decI_rReg);
14074   peepprocedure (lea_coalesce_imm);
14075   peepreplace (leaI_rReg_immI_peep());
14076 %}
14077 
14078 peephole
14079 %{
14080   peeppredicate(VM_Version::supports_fast_2op_lea());
14081   peepmatch (salI_rReg_immI2);
14082   peepprocedure (lea_coalesce_imm);
14083   peepreplace (leaI_rReg_immI2_peep());
14084 %}
14085 
14086 peephole
14087 %{
14088   peeppredicate(VM_Version::supports_fast_2op_lea());
14089   peepmatch (addL_rReg);
14090   peepprocedure (lea_coalesce_reg);
14091   peepreplace (leaL_rReg_rReg_peep());
14092 %}
14093 
14094 peephole
14095 %{
14096   peeppredicate(VM_Version::supports_fast_2op_lea());
14097   peepmatch (addL_rReg_imm);
14098   peepprocedure (lea_coalesce_imm);
14099   peepreplace (leaL_rReg_immL32_peep());
14100 %}
14101 
14102 peephole
14103 %{
14104   peeppredicate(VM_Version::supports_fast_3op_lea() ||
14105                 VM_Version::is_intel_cascade_lake());
14106   peepmatch (incL_rReg);
14107   peepprocedure (lea_coalesce_imm);
14108   peepreplace (leaL_rReg_immL32_peep());
14109 %}
14110 
14111 peephole
14112 %{
14113   peeppredicate(VM_Version::supports_fast_3op_lea() ||
14114                 VM_Version::is_intel_cascade_lake());
14115   peepmatch (decL_rReg);
14116   peepprocedure (lea_coalesce_imm);
14117   peepreplace (leaL_rReg_immL32_peep());
14118 %}
14119 
14120 peephole
14121 %{
14122   peeppredicate(VM_Version::supports_fast_2op_lea());
14123   peepmatch (salL_rReg_immI2);
14124   peepprocedure (lea_coalesce_imm);
14125   peepreplace (leaL_rReg_immI2_peep());
14126 %}
14127 
14128 //----------SMARTSPILL RULES---------------------------------------------------
14129 // These must follow all instruction definitions as they use the names
14130 // defined in the instructions definitions.