1 //
    2 // Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RDX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RDI int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 
  323 source_hpp %{
  324 
  325 #include "peephole_x86_64.hpp"
  326 
  327 %}
  328 
  329 // Register masks
  330 source_hpp %{
  331 
  332 extern RegMask _ANY_REG_mask;
  333 extern RegMask _PTR_REG_mask;
  334 extern RegMask _PTR_REG_NO_RBP_mask;
  335 extern RegMask _PTR_NO_RAX_REG_mask;
  336 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  337 extern RegMask _LONG_REG_mask;
  338 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  339 extern RegMask _LONG_NO_RCX_REG_mask;
  340 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  341 extern RegMask _INT_REG_mask;
  342 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  343 extern RegMask _INT_NO_RCX_REG_mask;
  344 extern RegMask _INT_NO_RBP_R13_REG_mask;
  345 extern RegMask _FLOAT_REG_mask;
  346 
  347 extern RegMask _STACK_OR_PTR_REG_mask;
  348 extern RegMask _STACK_OR_LONG_REG_mask;
  349 extern RegMask _STACK_OR_INT_REG_mask;
  350 
  351 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  352 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  353 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  354 
  355 %}
  356 
  357 source %{
  358 #define   RELOC_IMM64    Assembler::imm_operand
  359 #define   RELOC_DISP32   Assembler::disp32_operand
  360 
  361 #define __ _masm.
  362 
  363 RegMask _ANY_REG_mask;
  364 RegMask _PTR_REG_mask;
  365 RegMask _PTR_REG_NO_RBP_mask;
  366 RegMask _PTR_NO_RAX_REG_mask;
  367 RegMask _PTR_NO_RAX_RBX_REG_mask;
  368 RegMask _LONG_REG_mask;
  369 RegMask _LONG_NO_RAX_RDX_REG_mask;
  370 RegMask _LONG_NO_RCX_REG_mask;
  371 RegMask _LONG_NO_RBP_R13_REG_mask;
  372 RegMask _INT_REG_mask;
  373 RegMask _INT_NO_RAX_RDX_REG_mask;
  374 RegMask _INT_NO_RCX_REG_mask;
  375 RegMask _INT_NO_RBP_R13_REG_mask;
  376 RegMask _FLOAT_REG_mask;
  377 RegMask _STACK_OR_PTR_REG_mask;
  378 RegMask _STACK_OR_LONG_REG_mask;
  379 RegMask _STACK_OR_INT_REG_mask;
  380 
  381 static bool need_r12_heapbase() {
  382   return UseCompressedOops;
  383 }
  384 
  385 void reg_mask_init() {
  386   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  387   // We derive a number of subsets from it.
  388   _ANY_REG_mask = _ALL_REG_mask;
  389 
  390   if (PreserveFramePointer) {
  391     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  392     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  393   }
  394   if (need_r12_heapbase()) {
  395     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  396     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  397   }
  398 
  399   _PTR_REG_mask = _ANY_REG_mask;
  400   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  401   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  402   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  403   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  404 
  405   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  406   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  407 
  408   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  409   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  410   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  411 
  412   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  413   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  414   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  415 
  416   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  417   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  418   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  419 
  420   _LONG_REG_mask = _PTR_REG_mask;
  421   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  422   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  423 
  424   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  425   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  426   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  427   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  428   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  429 
  430   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  431   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  432   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  433 
  434   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  435   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  436   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  437   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  438   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  439 
  440   _INT_REG_mask = _ALL_INT_REG_mask;
  441   if (PreserveFramePointer) {
  442     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  443   }
  444   if (need_r12_heapbase()) {
  445     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  446   }
  447 
  448   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  449   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  450 
  451   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  452   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  453   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  454 
  455   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  456   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  457 
  458   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  459   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  460   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  461 
  462   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  463   // from the float_reg_legacy/float_reg_evex register class.
  464   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  465 }
  466 
  467 static bool generate_vzeroupper(Compile* C) {
  468   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  469 }
  470 
  471 static int clear_avx_size() {
  472   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   int offset = 13; // movq r10,#addr; callq (r10)
  494   if (this->ideal_Opcode() != Op_CallLeafVector) {
  495     offset += clear_avx_size();
  496   }
  497   return offset;
  498 }
  499 //
  500 // Compute padding required for nodes which need alignment
  501 //
  502 
  503 // The address of the call instruction needs to be 4-byte aligned to
  504 // ensure that it does not span a cache line so that it can be patched.
  505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  506 {
  507   current_offset += clear_avx_size(); // skip vzeroupper
  508   current_offset += 1; // skip call opcode byte
  509   return align_up(current_offset, alignment_required()) - current_offset;
  510 }
  511 
  512 // The address of the call instruction needs to be 4-byte aligned to
  513 // ensure that it does not span a cache line so that it can be patched.
  514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  515 {
  516   current_offset += clear_avx_size(); // skip vzeroupper
  517   current_offset += 11; // skip movq instruction + call opcode byte
  518   return align_up(current_offset, alignment_required()) - current_offset;
  519 }
  520 
  521 // EMIT_RM()
  522 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  523   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  524   cbuf.insts()->emit_int8(c);
  525 }
  526 
  527 // EMIT_CC()
  528 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  529   unsigned char c = (unsigned char) (f1 | f2);
  530   cbuf.insts()->emit_int8(c);
  531 }
  532 
  533 // EMIT_OPCODE()
  534 void emit_opcode(CodeBuffer &cbuf, int code) {
  535   cbuf.insts()->emit_int8((unsigned char) code);
  536 }
  537 
  538 // EMIT_OPCODE() w/ relocation information
  539 void emit_opcode(CodeBuffer &cbuf,
  540                  int code, relocInfo::relocType reloc, int offset, int format)
  541 {
  542   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  543   emit_opcode(cbuf, code);
  544 }
  545 
  546 // EMIT_D8()
  547 void emit_d8(CodeBuffer &cbuf, int d8) {
  548   cbuf.insts()->emit_int8((unsigned char) d8);
  549 }
  550 
  551 // EMIT_D16()
  552 void emit_d16(CodeBuffer &cbuf, int d16) {
  553   cbuf.insts()->emit_int16(d16);
  554 }
  555 
  556 // EMIT_D32()
  557 void emit_d32(CodeBuffer &cbuf, int d32) {
  558   cbuf.insts()->emit_int32(d32);
  559 }
  560 
  561 // EMIT_D64()
  562 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  563   cbuf.insts()->emit_int64(d64);
  564 }
  565 
  566 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  567 void emit_d32_reloc(CodeBuffer& cbuf,
  568                     int d32,
  569                     relocInfo::relocType reloc,
  570                     int format)
  571 {
  572   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  573   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  574   cbuf.insts()->emit_int32(d32);
  575 }
  576 
  577 // emit 32 bit value and construct relocation entry from RelocationHolder
  578 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
  579 #ifdef ASSERT
  580   if (rspec.reloc()->type() == relocInfo::oop_type &&
  581       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
  582     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
  583     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  584   }
  585 #endif
  586   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  587   cbuf.insts()->emit_int32(d32);
  588 }
  589 
  590 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  591   address next_ip = cbuf.insts_end() + 4;
  592   emit_d32_reloc(cbuf, (int) (addr - next_ip),
  593                  external_word_Relocation::spec(addr),
  594                  RELOC_DISP32);
  595 }
  596 
  597 
  598 // emit 64 bit value and construct relocation entry from relocInfo::relocType
  599 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  600   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  601   cbuf.insts()->emit_int64(d64);
  602 }
  603 
  604 // emit 64 bit value and construct relocation entry from RelocationHolder
  605 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
  606 #ifdef ASSERT
  607   if (rspec.reloc()->type() == relocInfo::oop_type &&
  608       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
  609     assert(Universe::heap()->is_in((address)d64), "should be real oop");
  610     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  611   }
  612 #endif
  613   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  614   cbuf.insts()->emit_int64(d64);
  615 }
  616 
  617 // Access stack slot for load or store
  618 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
  619 {
  620   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  621   if (-0x80 <= disp && disp < 0x80) {
  622     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
  623     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  624     emit_d8(cbuf, disp);     // Displacement  // R/M byte
  625   } else {
  626     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
  627     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  628     emit_d32(cbuf, disp);     // Displacement // R/M byte
  629   }
  630 }
  631 
  632    // rRegI ereg, memory mem) %{    // emit_reg_mem
  633 void encode_RegMem(CodeBuffer &cbuf,
  634                    int reg,
  635                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
  636 {
  637   assert(disp_reloc == relocInfo::none, "cannot have disp");
  638   int regenc = reg & 7;
  639   int baseenc = base & 7;
  640   int indexenc = index & 7;
  641 
  642   // There is no index & no scale, use form without SIB byte
  643   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
  644     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  645     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  646       emit_rm(cbuf, 0x0, regenc, baseenc); // *
  647     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  648       // If 8-bit displacement, mode 0x1
  649       emit_rm(cbuf, 0x1, regenc, baseenc); // *
  650       emit_d8(cbuf, disp);
  651     } else {
  652       // If 32-bit displacement
  653       if (base == -1) { // Special flag for absolute address
  654         emit_rm(cbuf, 0x0, regenc, 0x5); // *
  655         if (disp_reloc != relocInfo::none) {
  656           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  657         } else {
  658           emit_d32(cbuf, disp);
  659         }
  660       } else {
  661         // Normal base + offset
  662         emit_rm(cbuf, 0x2, regenc, baseenc); // *
  663         if (disp_reloc != relocInfo::none) {
  664           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  665         } else {
  666           emit_d32(cbuf, disp);
  667         }
  668       }
  669     }
  670   } else {
  671     // Else, encode with the SIB byte
  672     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  673     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  674       // If no displacement
  675       emit_rm(cbuf, 0x0, regenc, 0x4); // *
  676       emit_rm(cbuf, scale, indexenc, baseenc);
  677     } else {
  678       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  679         // If 8-bit displacement, mode 0x1
  680         emit_rm(cbuf, 0x1, regenc, 0x4); // *
  681         emit_rm(cbuf, scale, indexenc, baseenc);
  682         emit_d8(cbuf, disp);
  683       } else {
  684         // If 32-bit displacement
  685         if (base == 0x04 ) {
  686           emit_rm(cbuf, 0x2, regenc, 0x4);
  687           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
  688         } else {
  689           emit_rm(cbuf, 0x2, regenc, 0x4);
  690           emit_rm(cbuf, scale, indexenc, baseenc); // *
  691         }
  692         if (disp_reloc != relocInfo::none) {
  693           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  694         } else {
  695           emit_d32(cbuf, disp);
  696         }
  697       }
  698     }
  699   }
  700 }
  701 
  702 // This could be in MacroAssembler but it's fairly C2 specific
  703 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  704   Label exit;
  705   __ jccb(Assembler::noParity, exit);
  706   __ pushf();
  707   //
  708   // comiss/ucomiss instructions set ZF,PF,CF flags and
  709   // zero OF,AF,SF for NaN values.
  710   // Fixup flags by zeroing ZF,PF so that compare of NaN
  711   // values returns 'less than' result (CF is set).
  712   // Leave the rest of flags unchanged.
  713   //
  714   //    7 6 5 4 3 2 1 0
  715   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  716   //    0 0 1 0 1 0 1 1   (0x2B)
  717   //
  718   __ andq(Address(rsp, 0), 0xffffff2b);
  719   __ popf();
  720   __ bind(exit);
  721 }
  722 
  723 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  724   Label done;
  725   __ movl(dst, -1);
  726   __ jcc(Assembler::parity, done);
  727   __ jcc(Assembler::below, done);
  728   __ setb(Assembler::notEqual, dst);
  729   __ movzbl(dst, dst);
  730   __ bind(done);
  731 }
  732 
  733 // Math.min()    # Math.max()
  734 // --------------------------
  735 // ucomis[s/d]   #
  736 // ja   -> b     # a
  737 // jp   -> NaN   # NaN
  738 // jb   -> a     # b
  739 // je            #
  740 // |-jz -> a | b # a & b
  741 // |    -> a     #
  742 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  743                      XMMRegister a, XMMRegister b,
  744                      XMMRegister xmmt, Register rt,
  745                      bool min, bool single) {
  746 
  747   Label nan, zero, below, above, done;
  748 
  749   if (single)
  750     __ ucomiss(a, b);
  751   else
  752     __ ucomisd(a, b);
  753 
  754   if (dst->encoding() != (min ? b : a)->encoding())
  755     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  756   else
  757     __ jccb(Assembler::above, done);
  758 
  759   __ jccb(Assembler::parity, nan);  // PF=1
  760   __ jccb(Assembler::below, below); // CF=1
  761 
  762   // equal
  763   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  764   if (single) {
  765     __ ucomiss(a, xmmt);
  766     __ jccb(Assembler::equal, zero);
  767 
  768     __ movflt(dst, a);
  769     __ jmp(done);
  770   }
  771   else {
  772     __ ucomisd(a, xmmt);
  773     __ jccb(Assembler::equal, zero);
  774 
  775     __ movdbl(dst, a);
  776     __ jmp(done);
  777   }
  778 
  779   __ bind(zero);
  780   if (min)
  781     __ vpor(dst, a, b, Assembler::AVX_128bit);
  782   else
  783     __ vpand(dst, a, b, Assembler::AVX_128bit);
  784 
  785   __ jmp(done);
  786 
  787   __ bind(above);
  788   if (single)
  789     __ movflt(dst, min ? b : a);
  790   else
  791     __ movdbl(dst, min ? b : a);
  792 
  793   __ jmp(done);
  794 
  795   __ bind(nan);
  796   if (single) {
  797     __ movl(rt, 0x7fc00000); // Float.NaN
  798     __ movdl(dst, rt);
  799   }
  800   else {
  801     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  802     __ movdq(dst, rt);
  803   }
  804   __ jmp(done);
  805 
  806   __ bind(below);
  807   if (single)
  808     __ movflt(dst, min ? a : b);
  809   else
  810     __ movdbl(dst, min ? a : b);
  811 
  812   __ bind(done);
  813 }
  814 
  815 //=============================================================================
  816 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  817 
  818 int ConstantTable::calculate_table_base_offset() const {
  819   return 0;  // absolute addressing, no offset
  820 }
  821 
  822 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  823 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  824   ShouldNotReachHere();
  825 }
  826 
  827 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  828   // Empty encoding
  829 }
  830 
  831 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  832   return 0;
  833 }
  834 
  835 #ifndef PRODUCT
  836 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  837   st->print("# MachConstantBaseNode (empty encoding)");
  838 }
  839 #endif
  840 
  841 
  842 //=============================================================================
  843 #ifndef PRODUCT
  844 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  845   Compile* C = ra_->C;
  846 
  847   int framesize = C->output()->frame_size_in_bytes();
  848   int bangsize = C->output()->bang_size_in_bytes();
  849   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  850   // Remove wordSize for return addr which is already pushed.
  851   framesize -= wordSize;
  852 
  853   if (C->output()->need_stack_bang(bangsize)) {
  854     framesize -= wordSize;
  855     st->print("# stack bang (%d bytes)", bangsize);
  856     st->print("\n\t");
  857     st->print("pushq   rbp\t# Save rbp");
  858     if (PreserveFramePointer) {
  859         st->print("\n\t");
  860         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  861     }
  862     if (framesize) {
  863       st->print("\n\t");
  864       st->print("subq    rsp, #%d\t# Create frame",framesize);
  865     }
  866   } else {
  867     st->print("subq    rsp, #%d\t# Create frame",framesize);
  868     st->print("\n\t");
  869     framesize -= wordSize;
  870     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  871     if (PreserveFramePointer) {
  872       st->print("\n\t");
  873       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  874       if (framesize > 0) {
  875         st->print("\n\t");
  876         st->print("addq    rbp, #%d", framesize);
  877       }
  878     }
  879   }
  880 
  881   if (VerifyStackAtCalls) {
  882     st->print("\n\t");
  883     framesize -= wordSize;
  884     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  885 #ifdef ASSERT
  886     st->print("\n\t");
  887     st->print("# stack alignment check");
  888 #endif
  889   }
  890   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  891     st->print("\n\t");
  892     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  893     st->print("\n\t");
  894     st->print("je      fast_entry\t");
  895     st->print("\n\t");
  896     st->print("call    #nmethod_entry_barrier_stub\t");
  897     st->print("\n\tfast_entry:");
  898   }
  899   st->cr();
  900 }
  901 #endif
  902 
  903 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  904   Compile* C = ra_->C;
  905   C2_MacroAssembler _masm(&cbuf);
  906 
  907   int framesize = C->output()->frame_size_in_bytes();
  908   int bangsize = C->output()->bang_size_in_bytes();
  909 
  910   if (C->clinit_barrier_on_entry()) {
  911     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  912     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  913 
  914     Label L_skip_barrier;
  915     Register klass = rscratch1;
  916 
  917     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  918     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  919 
  920     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  921 
  922     __ bind(L_skip_barrier);
  923   }
  924 
  925   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
  926 
  927   C->output()->set_frame_complete(cbuf.insts_size());
  928 
  929   if (C->has_mach_constant_base_node()) {
  930     // NOTE: We set the table base offset here because users might be
  931     // emitted before MachConstantBaseNode.
  932     ConstantTable& constant_table = C->output()->constant_table();
  933     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  934   }
  935 }
  936 
  937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  938 {
  939   return MachNode::size(ra_); // too many variables; just compute it
  940                               // the hard way
  941 }
  942 
  943 int MachPrologNode::reloc() const
  944 {
  945   return 0; // a large enough number
  946 }
  947 
  948 //=============================================================================
  949 #ifndef PRODUCT
  950 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  951 {
  952   Compile* C = ra_->C;
  953   if (generate_vzeroupper(C)) {
  954     st->print("vzeroupper");
  955     st->cr(); st->print("\t");
  956   }
  957 
  958   int framesize = C->output()->frame_size_in_bytes();
  959   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  960   // Remove word for return adr already pushed
  961   // and RBP
  962   framesize -= 2*wordSize;
  963 
  964   if (framesize) {
  965     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  966     st->print("\t");
  967   }
  968 
  969   st->print_cr("popq    rbp");
  970   if (do_polling() && C->is_method_compilation()) {
  971     st->print("\t");
  972     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  973                  "ja      #safepoint_stub\t"
  974                  "# Safepoint: poll for GC");
  975   }
  976 }
  977 #endif
  978 
  979 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  980 {
  981   Compile* C = ra_->C;
  982   MacroAssembler _masm(&cbuf);
  983 
  984   if (generate_vzeroupper(C)) {
  985     // Clear upper bits of YMM registers when current compiled code uses
  986     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  987     __ vzeroupper();
  988   }
  989 
  990   int framesize = C->output()->frame_size_in_bytes();
  991   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  992   // Remove word for return adr already pushed
  993   // and RBP
  994   framesize -= 2*wordSize;
  995 
  996   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  997 
  998   if (framesize) {
  999     emit_opcode(cbuf, Assembler::REX_W);
 1000     if (framesize < 0x80) {
 1001       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 1002       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1003       emit_d8(cbuf, framesize);
 1004     } else {
 1005       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 1006       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1007       emit_d32(cbuf, framesize);
 1008     }
 1009   }
 1010 
 1011   // popq rbp
 1012   emit_opcode(cbuf, 0x58 | RBP_enc);
 1013 
 1014   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1015     __ reserved_stack_check();
 1016   }
 1017 
 1018   if (do_polling() && C->is_method_compilation()) {
 1019     MacroAssembler _masm(&cbuf);
 1020     Label dummy_label;
 1021     Label* code_stub = &dummy_label;
 1022     if (!C->output()->in_scratch_emit_size()) {
 1023       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1024       C->output()->add_stub(stub);
 1025       code_stub = &stub->entry();
 1026     }
 1027     __ relocate(relocInfo::poll_return_type);
 1028     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1029   }
 1030 }
 1031 
 1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1033 {
 1034   return MachNode::size(ra_); // too many variables; just compute it
 1035                               // the hard way
 1036 }
 1037 
 1038 int MachEpilogNode::reloc() const
 1039 {
 1040   return 2; // a large enough number
 1041 }
 1042 
 1043 const Pipeline* MachEpilogNode::pipeline() const
 1044 {
 1045   return MachNode::pipeline_class();
 1046 }
 1047 
 1048 //=============================================================================
 1049 
 1050 enum RC {
 1051   rc_bad,
 1052   rc_int,
 1053   rc_kreg,
 1054   rc_float,
 1055   rc_stack
 1056 };
 1057 
 1058 static enum RC rc_class(OptoReg::Name reg)
 1059 {
 1060   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1061 
 1062   if (OptoReg::is_stack(reg)) return rc_stack;
 1063 
 1064   VMReg r = OptoReg::as_VMReg(reg);
 1065 
 1066   if (r->is_Register()) return rc_int;
 1067 
 1068   if (r->is_KRegister()) return rc_kreg;
 1069 
 1070   assert(r->is_XMMRegister(), "must be");
 1071   return rc_float;
 1072 }
 1073 
 1074 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 1075 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
 1076                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 1077 
 1078 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
 1079                      int stack_offset, int reg, uint ireg, outputStream* st);
 1080 
 1081 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 1082                                       int dst_offset, uint ireg, outputStream* st) {
 1083   if (cbuf) {
 1084     MacroAssembler _masm(cbuf);
 1085     switch (ireg) {
 1086     case Op_VecS:
 1087       __ movq(Address(rsp, -8), rax);
 1088       __ movl(rax, Address(rsp, src_offset));
 1089       __ movl(Address(rsp, dst_offset), rax);
 1090       __ movq(rax, Address(rsp, -8));
 1091       break;
 1092     case Op_VecD:
 1093       __ pushq(Address(rsp, src_offset));
 1094       __ popq (Address(rsp, dst_offset));
 1095       break;
 1096     case Op_VecX:
 1097       __ pushq(Address(rsp, src_offset));
 1098       __ popq (Address(rsp, dst_offset));
 1099       __ pushq(Address(rsp, src_offset+8));
 1100       __ popq (Address(rsp, dst_offset+8));
 1101       break;
 1102     case Op_VecY:
 1103       __ vmovdqu(Address(rsp, -32), xmm0);
 1104       __ vmovdqu(xmm0, Address(rsp, src_offset));
 1105       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 1106       __ vmovdqu(xmm0, Address(rsp, -32));
 1107       break;
 1108     case Op_VecZ:
 1109       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 1110       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1111       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1112       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1113       break;
 1114     default:
 1115       ShouldNotReachHere();
 1116     }
 1117 #ifndef PRODUCT
 1118   } else {
 1119     switch (ireg) {
 1120     case Op_VecS:
 1121       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1122                 "movl    rax, [rsp + #%d]\n\t"
 1123                 "movl    [rsp + #%d], rax\n\t"
 1124                 "movq    rax, [rsp - #8]",
 1125                 src_offset, dst_offset);
 1126       break;
 1127     case Op_VecD:
 1128       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1129                 "popq    [rsp + #%d]",
 1130                 src_offset, dst_offset);
 1131       break;
 1132      case Op_VecX:
 1133       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 1134                 "popq    [rsp + #%d]\n\t"
 1135                 "pushq   [rsp + #%d]\n\t"
 1136                 "popq    [rsp + #%d]",
 1137                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 1138       break;
 1139     case Op_VecY:
 1140       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1141                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1142                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1143                 "vmovdqu xmm0, [rsp - #32]",
 1144                 src_offset, dst_offset);
 1145       break;
 1146     case Op_VecZ:
 1147       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1148                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1149                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1150                 "vmovdqu xmm0, [rsp - #64]",
 1151                 src_offset, dst_offset);
 1152       break;
 1153     default:
 1154       ShouldNotReachHere();
 1155     }
 1156 #endif
 1157   }
 1158 }
 1159 
 1160 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1161                                        PhaseRegAlloc* ra_,
 1162                                        bool do_size,
 1163                                        outputStream* st) const {
 1164   assert(cbuf != NULL || st  != NULL, "sanity");
 1165   // Get registers to move
 1166   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1167   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1168   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1169   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1170 
 1171   enum RC src_second_rc = rc_class(src_second);
 1172   enum RC src_first_rc = rc_class(src_first);
 1173   enum RC dst_second_rc = rc_class(dst_second);
 1174   enum RC dst_first_rc = rc_class(dst_first);
 1175 
 1176   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1177          "must move at least 1 register" );
 1178 
 1179   if (src_first == dst_first && src_second == dst_second) {
 1180     // Self copy, no move
 1181     return 0;
 1182   }
 1183   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1184     uint ireg = ideal_reg();
 1185     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1186     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1187     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1188       // mem -> mem
 1189       int src_offset = ra_->reg2offset(src_first);
 1190       int dst_offset = ra_->reg2offset(dst_first);
 1191       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1192     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1193       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1194     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1195       int stack_offset = ra_->reg2offset(dst_first);
 1196       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1197     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1198       int stack_offset = ra_->reg2offset(src_first);
 1199       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1200     } else {
 1201       ShouldNotReachHere();
 1202     }
 1203     return 0;
 1204   }
 1205   if (src_first_rc == rc_stack) {
 1206     // mem ->
 1207     if (dst_first_rc == rc_stack) {
 1208       // mem -> mem
 1209       assert(src_second != dst_first, "overlap");
 1210       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1211           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1212         // 64-bit
 1213         int src_offset = ra_->reg2offset(src_first);
 1214         int dst_offset = ra_->reg2offset(dst_first);
 1215         if (cbuf) {
 1216           MacroAssembler _masm(cbuf);
 1217           __ pushq(Address(rsp, src_offset));
 1218           __ popq (Address(rsp, dst_offset));
 1219 #ifndef PRODUCT
 1220         } else {
 1221           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1222                     "popq    [rsp + #%d]",
 1223                      src_offset, dst_offset);
 1224 #endif
 1225         }
 1226       } else {
 1227         // 32-bit
 1228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1230         // No pushl/popl, so:
 1231         int src_offset = ra_->reg2offset(src_first);
 1232         int dst_offset = ra_->reg2offset(dst_first);
 1233         if (cbuf) {
 1234           MacroAssembler _masm(cbuf);
 1235           __ movq(Address(rsp, -8), rax);
 1236           __ movl(rax, Address(rsp, src_offset));
 1237           __ movl(Address(rsp, dst_offset), rax);
 1238           __ movq(rax, Address(rsp, -8));
 1239 #ifndef PRODUCT
 1240         } else {
 1241           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1242                     "movl    rax, [rsp + #%d]\n\t"
 1243                     "movl    [rsp + #%d], rax\n\t"
 1244                     "movq    rax, [rsp - #8]",
 1245                      src_offset, dst_offset);
 1246 #endif
 1247         }
 1248       }
 1249       return 0;
 1250     } else if (dst_first_rc == rc_int) {
 1251       // mem -> gpr
 1252       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1253           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1254         // 64-bit
 1255         int offset = ra_->reg2offset(src_first);
 1256         if (cbuf) {
 1257           MacroAssembler _masm(cbuf);
 1258           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1259 #ifndef PRODUCT
 1260         } else {
 1261           st->print("movq    %s, [rsp + #%d]\t# spill",
 1262                      Matcher::regName[dst_first],
 1263                      offset);
 1264 #endif
 1265         }
 1266       } else {
 1267         // 32-bit
 1268         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1269         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1270         int offset = ra_->reg2offset(src_first);
 1271         if (cbuf) {
 1272           MacroAssembler _masm(cbuf);
 1273           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1274 #ifndef PRODUCT
 1275         } else {
 1276           st->print("movl    %s, [rsp + #%d]\t# spill",
 1277                      Matcher::regName[dst_first],
 1278                      offset);
 1279 #endif
 1280         }
 1281       }
 1282       return 0;
 1283     } else if (dst_first_rc == rc_float) {
 1284       // mem-> xmm
 1285       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1286           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1287         // 64-bit
 1288         int offset = ra_->reg2offset(src_first);
 1289         if (cbuf) {
 1290           MacroAssembler _masm(cbuf);
 1291           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1292 #ifndef PRODUCT
 1293         } else {
 1294           st->print("%s  %s, [rsp + #%d]\t# spill",
 1295                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1296                      Matcher::regName[dst_first],
 1297                      offset);
 1298 #endif
 1299         }
 1300       } else {
 1301         // 32-bit
 1302         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1303         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1304         int offset = ra_->reg2offset(src_first);
 1305         if (cbuf) {
 1306           MacroAssembler _masm(cbuf);
 1307           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1308 #ifndef PRODUCT
 1309         } else {
 1310           st->print("movss   %s, [rsp + #%d]\t# spill",
 1311                      Matcher::regName[dst_first],
 1312                      offset);
 1313 #endif
 1314         }
 1315       }
 1316       return 0;
 1317     } else if (dst_first_rc == rc_kreg) {
 1318       // mem -> kreg
 1319       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1320           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1321         // 64-bit
 1322         int offset = ra_->reg2offset(src_first);
 1323         if (cbuf) {
 1324           MacroAssembler _masm(cbuf);
 1325           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1326 #ifndef PRODUCT
 1327         } else {
 1328           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1329                      Matcher::regName[dst_first],
 1330                      offset);
 1331 #endif
 1332         }
 1333       }
 1334       return 0;
 1335     }
 1336   } else if (src_first_rc == rc_int) {
 1337     // gpr ->
 1338     if (dst_first_rc == rc_stack) {
 1339       // gpr -> mem
 1340       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1341           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1342         // 64-bit
 1343         int offset = ra_->reg2offset(dst_first);
 1344         if (cbuf) {
 1345           MacroAssembler _masm(cbuf);
 1346           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1347 #ifndef PRODUCT
 1348         } else {
 1349           st->print("movq    [rsp + #%d], %s\t# spill",
 1350                      offset,
 1351                      Matcher::regName[src_first]);
 1352 #endif
 1353         }
 1354       } else {
 1355         // 32-bit
 1356         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1357         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1358         int offset = ra_->reg2offset(dst_first);
 1359         if (cbuf) {
 1360           MacroAssembler _masm(cbuf);
 1361           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1362 #ifndef PRODUCT
 1363         } else {
 1364           st->print("movl    [rsp + #%d], %s\t# spill",
 1365                      offset,
 1366                      Matcher::regName[src_first]);
 1367 #endif
 1368         }
 1369       }
 1370       return 0;
 1371     } else if (dst_first_rc == rc_int) {
 1372       // gpr -> gpr
 1373       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1374           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1375         // 64-bit
 1376         if (cbuf) {
 1377           MacroAssembler _masm(cbuf);
 1378           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1379                   as_Register(Matcher::_regEncode[src_first]));
 1380 #ifndef PRODUCT
 1381         } else {
 1382           st->print("movq    %s, %s\t# spill",
 1383                      Matcher::regName[dst_first],
 1384                      Matcher::regName[src_first]);
 1385 #endif
 1386         }
 1387         return 0;
 1388       } else {
 1389         // 32-bit
 1390         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1391         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1392         if (cbuf) {
 1393           MacroAssembler _masm(cbuf);
 1394           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1395                   as_Register(Matcher::_regEncode[src_first]));
 1396 #ifndef PRODUCT
 1397         } else {
 1398           st->print("movl    %s, %s\t# spill",
 1399                      Matcher::regName[dst_first],
 1400                      Matcher::regName[src_first]);
 1401 #endif
 1402         }
 1403         return 0;
 1404       }
 1405     } else if (dst_first_rc == rc_float) {
 1406       // gpr -> xmm
 1407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1409         // 64-bit
 1410         if (cbuf) {
 1411           MacroAssembler _masm(cbuf);
 1412           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1413 #ifndef PRODUCT
 1414         } else {
 1415           st->print("movdq   %s, %s\t# spill",
 1416                      Matcher::regName[dst_first],
 1417                      Matcher::regName[src_first]);
 1418 #endif
 1419         }
 1420       } else {
 1421         // 32-bit
 1422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1424         if (cbuf) {
 1425           MacroAssembler _masm(cbuf);
 1426           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1427 #ifndef PRODUCT
 1428         } else {
 1429           st->print("movdl   %s, %s\t# spill",
 1430                      Matcher::regName[dst_first],
 1431                      Matcher::regName[src_first]);
 1432 #endif
 1433         }
 1434       }
 1435       return 0;
 1436     } else if (dst_first_rc == rc_kreg) {
 1437       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1438           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1439         // 64-bit
 1440         if (cbuf) {
 1441           MacroAssembler _masm(cbuf);
 1442           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1443   #ifndef PRODUCT
 1444         } else {
 1445            st->print("kmovq   %s, %s\t# spill",
 1446                        Matcher::regName[dst_first],
 1447                        Matcher::regName[src_first]);
 1448   #endif
 1449         }
 1450       }
 1451       Unimplemented();
 1452       return 0;
 1453     }
 1454   } else if (src_first_rc == rc_float) {
 1455     // xmm ->
 1456     if (dst_first_rc == rc_stack) {
 1457       // xmm -> mem
 1458       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1459           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1460         // 64-bit
 1461         int offset = ra_->reg2offset(dst_first);
 1462         if (cbuf) {
 1463           MacroAssembler _masm(cbuf);
 1464           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1465 #ifndef PRODUCT
 1466         } else {
 1467           st->print("movsd   [rsp + #%d], %s\t# spill",
 1468                      offset,
 1469                      Matcher::regName[src_first]);
 1470 #endif
 1471         }
 1472       } else {
 1473         // 32-bit
 1474         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1475         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1476         int offset = ra_->reg2offset(dst_first);
 1477         if (cbuf) {
 1478           MacroAssembler _masm(cbuf);
 1479           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1480 #ifndef PRODUCT
 1481         } else {
 1482           st->print("movss   [rsp + #%d], %s\t# spill",
 1483                      offset,
 1484                      Matcher::regName[src_first]);
 1485 #endif
 1486         }
 1487       }
 1488       return 0;
 1489     } else if (dst_first_rc == rc_int) {
 1490       // xmm -> gpr
 1491       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1492           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1493         // 64-bit
 1494         if (cbuf) {
 1495           MacroAssembler _masm(cbuf);
 1496           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1497 #ifndef PRODUCT
 1498         } else {
 1499           st->print("movdq   %s, %s\t# spill",
 1500                      Matcher::regName[dst_first],
 1501                      Matcher::regName[src_first]);
 1502 #endif
 1503         }
 1504       } else {
 1505         // 32-bit
 1506         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1507         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1508         if (cbuf) {
 1509           MacroAssembler _masm(cbuf);
 1510           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1511 #ifndef PRODUCT
 1512         } else {
 1513           st->print("movdl   %s, %s\t# spill",
 1514                      Matcher::regName[dst_first],
 1515                      Matcher::regName[src_first]);
 1516 #endif
 1517         }
 1518       }
 1519       return 0;
 1520     } else if (dst_first_rc == rc_float) {
 1521       // xmm -> xmm
 1522       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1523           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1524         // 64-bit
 1525         if (cbuf) {
 1526           MacroAssembler _masm(cbuf);
 1527           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1528 #ifndef PRODUCT
 1529         } else {
 1530           st->print("%s  %s, %s\t# spill",
 1531                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1532                      Matcher::regName[dst_first],
 1533                      Matcher::regName[src_first]);
 1534 #endif
 1535         }
 1536       } else {
 1537         // 32-bit
 1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1540         if (cbuf) {
 1541           MacroAssembler _masm(cbuf);
 1542           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1543 #ifndef PRODUCT
 1544         } else {
 1545           st->print("%s  %s, %s\t# spill",
 1546                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1547                      Matcher::regName[dst_first],
 1548                      Matcher::regName[src_first]);
 1549 #endif
 1550         }
 1551       }
 1552       return 0;
 1553     } else if (dst_first_rc == rc_kreg) {
 1554       assert(false, "Illegal spilling");
 1555       return 0;
 1556     }
 1557   } else if (src_first_rc == rc_kreg) {
 1558     if (dst_first_rc == rc_stack) {
 1559       // mem -> kreg
 1560       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1561           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1562         // 64-bit
 1563         int offset = ra_->reg2offset(dst_first);
 1564         if (cbuf) {
 1565           MacroAssembler _masm(cbuf);
 1566           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1567 #ifndef PRODUCT
 1568         } else {
 1569           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1570                      offset,
 1571                      Matcher::regName[src_first]);
 1572 #endif
 1573         }
 1574       }
 1575       return 0;
 1576     } else if (dst_first_rc == rc_int) {
 1577       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1578           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1579         // 64-bit
 1580         if (cbuf) {
 1581           MacroAssembler _masm(cbuf);
 1582           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1583 #ifndef PRODUCT
 1584         } else {
 1585          st->print("kmovq   %s, %s\t# spill",
 1586                      Matcher::regName[dst_first],
 1587                      Matcher::regName[src_first]);
 1588 #endif
 1589         }
 1590       }
 1591       Unimplemented();
 1592       return 0;
 1593     } else if (dst_first_rc == rc_kreg) {
 1594       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1595           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1596         // 64-bit
 1597         if (cbuf) {
 1598           MacroAssembler _masm(cbuf);
 1599           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1600 #ifndef PRODUCT
 1601         } else {
 1602          st->print("kmovq   %s, %s\t# spill",
 1603                      Matcher::regName[dst_first],
 1604                      Matcher::regName[src_first]);
 1605 #endif
 1606         }
 1607       }
 1608       return 0;
 1609     } else if (dst_first_rc == rc_float) {
 1610       assert(false, "Illegal spill");
 1611       return 0;
 1612     }
 1613   }
 1614 
 1615   assert(0," foo ");
 1616   Unimplemented();
 1617   return 0;
 1618 }
 1619 
 1620 #ifndef PRODUCT
 1621 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1622   implementation(NULL, ra_, false, st);
 1623 }
 1624 #endif
 1625 
 1626 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1627   implementation(&cbuf, ra_, false, NULL);
 1628 }
 1629 
 1630 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 //=============================================================================
 1635 #ifndef PRODUCT
 1636 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1637 {
 1638   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1639   int reg = ra_->get_reg_first(this);
 1640   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1641             Matcher::regName[reg], offset);
 1642 }
 1643 #endif
 1644 
 1645 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1646 {
 1647   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1648   int reg = ra_->get_encode(this);
 1649   if (offset >= 0x80) {
 1650     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1651     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1652     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1653     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1654     emit_d32(cbuf, offset);
 1655   } else {
 1656     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1657     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1658     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1659     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1660     emit_d8(cbuf, offset);
 1661   }
 1662 }
 1663 
 1664 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1665 {
 1666   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1667   return (offset < 0x80) ? 5 : 8; // REX
 1668 }
 1669 
 1670 //=============================================================================
 1671 #ifndef PRODUCT
 1672 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1673 {
 1674   if (UseCompressedClassPointers) {
 1675     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1676     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1677     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1678   } else {
 1679     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1680                  "# Inline cache check");
 1681   }
 1682   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1683   st->print_cr("\tnop\t# nops to align entry point");
 1684 }
 1685 #endif
 1686 
 1687 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1688 {
 1689   MacroAssembler masm(&cbuf);
 1690   uint insts_size = cbuf.insts_size();
 1691   if (UseCompressedClassPointers) {
 1692     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1693     masm.cmpptr(rax, rscratch1);
 1694   } else {
 1695     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1696   }
 1697 
 1698   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1699 
 1700   /* WARNING these NOPs are critical so that verified entry point is properly
 1701      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1702   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1703   if (OptoBreakpoint) {
 1704     // Leave space for int3
 1705     nops_cnt -= 1;
 1706   }
 1707   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1708   if (nops_cnt > 0)
 1709     masm.nop(nops_cnt);
 1710 }
 1711 
 1712 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1713 {
 1714   return MachNode::size(ra_); // too many variables; just compute it
 1715                               // the hard way
 1716 }
 1717 
 1718 
 1719 //=============================================================================
 1720 
 1721 const bool Matcher::supports_vector_calling_convention(void) {
 1722   if (EnableVectorSupport && UseVectorStubs) {
 1723     return true;
 1724   }
 1725   return false;
 1726 }
 1727 
 1728 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1729   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1730   int lo = XMM0_num;
 1731   int hi = XMM0b_num;
 1732   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1733   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1734   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1735   return OptoRegPair(hi, lo);
 1736 }
 1737 
 1738 // Is this branch offset short enough that a short branch can be used?
 1739 //
 1740 // NOTE: If the platform does not provide any short branch variants, then
 1741 //       this method should return false for offset 0.
 1742 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1743   // The passed offset is relative to address of the branch.
 1744   // On 86 a branch displacement is calculated relative to address
 1745   // of a next instruction.
 1746   offset -= br_size;
 1747 
 1748   // the short version of jmpConUCF2 contains multiple branches,
 1749   // making the reach slightly less
 1750   if (rule == jmpConUCF2_rule)
 1751     return (-126 <= offset && offset <= 125);
 1752   return (-128 <= offset && offset <= 127);
 1753 }
 1754 
 1755 // Return whether or not this register is ever used as an argument.
 1756 // This function is used on startup to build the trampoline stubs in
 1757 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1758 // call in the trampoline, and arguments in those registers not be
 1759 // available to the callee.
 1760 bool Matcher::can_be_java_arg(int reg)
 1761 {
 1762   return
 1763     reg ==  RDI_num || reg == RDI_H_num ||
 1764     reg ==  RSI_num || reg == RSI_H_num ||
 1765     reg ==  RDX_num || reg == RDX_H_num ||
 1766     reg ==  RCX_num || reg == RCX_H_num ||
 1767     reg ==   R8_num || reg ==  R8_H_num ||
 1768     reg ==   R9_num || reg ==  R9_H_num ||
 1769     reg ==  R12_num || reg == R12_H_num ||
 1770     reg == XMM0_num || reg == XMM0b_num ||
 1771     reg == XMM1_num || reg == XMM1b_num ||
 1772     reg == XMM2_num || reg == XMM2b_num ||
 1773     reg == XMM3_num || reg == XMM3b_num ||
 1774     reg == XMM4_num || reg == XMM4b_num ||
 1775     reg == XMM5_num || reg == XMM5b_num ||
 1776     reg == XMM6_num || reg == XMM6b_num ||
 1777     reg == XMM7_num || reg == XMM7b_num;
 1778 }
 1779 
 1780 bool Matcher::is_spillable_arg(int reg)
 1781 {
 1782   return can_be_java_arg(reg);
 1783 }
 1784 
 1785 uint Matcher::int_pressure_limit()
 1786 {
 1787   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1788 }
 1789 
 1790 uint Matcher::float_pressure_limit()
 1791 {
 1792   // After experiment around with different values, the following default threshold
 1793   // works best for LCM's register pressure scheduling on x64.
 1794   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1795   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1796   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1797 }
 1798 
 1799 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1800   // In 64 bit mode a code which use multiply when
 1801   // devisor is constant is faster than hardware
 1802   // DIV instruction (it uses MulHiL).
 1803   return false;
 1804 }
 1805 
 1806 // Register for DIVI projection of divmodI
 1807 RegMask Matcher::divI_proj_mask() {
 1808   return INT_RAX_REG_mask();
 1809 }
 1810 
 1811 // Register for MODI projection of divmodI
 1812 RegMask Matcher::modI_proj_mask() {
 1813   return INT_RDX_REG_mask();
 1814 }
 1815 
 1816 // Register for DIVL projection of divmodL
 1817 RegMask Matcher::divL_proj_mask() {
 1818   return LONG_RAX_REG_mask();
 1819 }
 1820 
 1821 // Register for MODL projection of divmodL
 1822 RegMask Matcher::modL_proj_mask() {
 1823   return LONG_RDX_REG_mask();
 1824 }
 1825 
 1826 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1827 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1828     return NO_REG_mask();
 1829 }
 1830 
 1831 %}
 1832 
 1833 //----------ENCODING BLOCK-----------------------------------------------------
 1834 // This block specifies the encoding classes used by the compiler to
 1835 // output byte streams.  Encoding classes are parameterized macros
 1836 // used by Machine Instruction Nodes in order to generate the bit
 1837 // encoding of the instruction.  Operands specify their base encoding
 1838 // interface with the interface keyword.  There are currently
 1839 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1840 // COND_INTER.  REG_INTER causes an operand to generate a function
 1841 // which returns its register number when queried.  CONST_INTER causes
 1842 // an operand to generate a function which returns the value of the
 1843 // constant when queried.  MEMORY_INTER causes an operand to generate
 1844 // four functions which return the Base Register, the Index Register,
 1845 // the Scale Value, and the Offset Value of the operand when queried.
 1846 // COND_INTER causes an operand to generate six functions which return
 1847 // the encoding code (ie - encoding bits for the instruction)
 1848 // associated with each basic boolean condition for a conditional
 1849 // instruction.
 1850 //
 1851 // Instructions specify two basic values for encoding.  Again, a
 1852 // function is available to check if the constant displacement is an
 1853 // oop. They use the ins_encode keyword to specify their encoding
 1854 // classes (which must be a sequence of enc_class names, and their
 1855 // parameters, specified in the encoding block), and they use the
 1856 // opcode keyword to specify, in order, their primary, secondary, and
 1857 // tertiary opcode.  Only the opcode sections which a particular
 1858 // instruction needs for encoding need to be specified.
 1859 encode %{
 1860   // Build emit functions for each basic byte or larger field in the
 1861   // intel encoding scheme (opcode, rm, sib, immediate), and call them
 1862   // from C++ code in the enc_class source block.  Emit functions will
 1863   // live in the main source block for now.  In future, we can
 1864   // generalize this by adding a syntax that specifies the sizes of
 1865   // fields in an order, so that the adlc can build the emit functions
 1866   // automagically
 1867 
 1868   // Emit primary opcode
 1869   enc_class OpcP
 1870   %{
 1871     emit_opcode(cbuf, $primary);
 1872   %}
 1873 
 1874   // Emit secondary opcode
 1875   enc_class OpcS
 1876   %{
 1877     emit_opcode(cbuf, $secondary);
 1878   %}
 1879 
 1880   // Emit tertiary opcode
 1881   enc_class OpcT
 1882   %{
 1883     emit_opcode(cbuf, $tertiary);
 1884   %}
 1885 
 1886   // Emit opcode directly
 1887   enc_class Opcode(immI d8)
 1888   %{
 1889     emit_opcode(cbuf, $d8$$constant);
 1890   %}
 1891 
 1892   // Emit size prefix
 1893   enc_class SizePrefix
 1894   %{
 1895     emit_opcode(cbuf, 0x66);
 1896   %}
 1897 
 1898   enc_class reg(rRegI reg)
 1899   %{
 1900     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
 1901   %}
 1902 
 1903   enc_class reg_reg(rRegI dst, rRegI src)
 1904   %{
 1905     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1906   %}
 1907 
 1908   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
 1909   %{
 1910     emit_opcode(cbuf, $opcode$$constant);
 1911     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1912   %}
 1913 
 1914   enc_class cdql_enc(no_rax_rdx_RegI div)
 1915   %{
 1916     // Full implementation of Java idiv and irem; checks for
 1917     // special case as described in JVM spec., p.243 & p.271.
 1918     //
 1919     //         normal case                           special case
 1920     //
 1921     // input : rax: dividend                         min_int
 1922     //         reg: divisor                          -1
 1923     //
 1924     // output: rax: quotient  (= rax idiv reg)       min_int
 1925     //         rdx: remainder (= rax irem reg)       0
 1926     //
 1927     //  Code sequnce:
 1928     //
 1929     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1930     //    5:   75 07/08                jne    e <normal>
 1931     //    7:   33 d2                   xor    %edx,%edx
 1932     //  [div >= 8 -> offset + 1]
 1933     //  [REX_B]
 1934     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1935     //    c:   74 03/04                je     11 <done>
 1936     // 000000000000000e <normal>:
 1937     //    e:   99                      cltd
 1938     //  [div >= 8 -> offset + 1]
 1939     //  [REX_B]
 1940     //    f:   f7 f9                   idiv   $div
 1941     // 0000000000000011 <done>:
 1942     MacroAssembler _masm(&cbuf);
 1943     Label normal;
 1944     Label done;
 1945 
 1946     // cmp    $0x80000000,%eax
 1947     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1948 
 1949     // jne    e <normal>
 1950     __ jccb(Assembler::notEqual, normal);
 1951 
 1952     // xor    %edx,%edx
 1953     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1954 
 1955     // cmp    $0xffffffffffffffff,%ecx
 1956     __ cmpl($div$$Register, -1);
 1957 
 1958     // je     11 <done>
 1959     __ jccb(Assembler::equal, done);
 1960 
 1961     // <normal>
 1962     // cltd
 1963     __ bind(normal);
 1964     __ cdql();
 1965 
 1966     // idivl
 1967     // <done>
 1968     __ idivl($div$$Register);
 1969     __ bind(done);
 1970   %}
 1971 
 1972   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1973   %{
 1974     // Full implementation of Java ldiv and lrem; checks for
 1975     // special case as described in JVM spec., p.243 & p.271.
 1976     //
 1977     //         normal case                           special case
 1978     //
 1979     // input : rax: dividend                         min_long
 1980     //         reg: divisor                          -1
 1981     //
 1982     // output: rax: quotient  (= rax idiv reg)       min_long
 1983     //         rdx: remainder (= rax irem reg)       0
 1984     //
 1985     //  Code sequnce:
 1986     //
 1987     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1988     //    7:   00 00 80
 1989     //    a:   48 39 d0                cmp    %rdx,%rax
 1990     //    d:   75 08                   jne    17 <normal>
 1991     //    f:   33 d2                   xor    %edx,%edx
 1992     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 1993     //   15:   74 05                   je     1c <done>
 1994     // 0000000000000017 <normal>:
 1995     //   17:   48 99                   cqto
 1996     //   19:   48 f7 f9                idiv   $div
 1997     // 000000000000001c <done>:
 1998     MacroAssembler _masm(&cbuf);
 1999     Label normal;
 2000     Label done;
 2001 
 2002     // mov    $0x8000000000000000,%rdx
 2003     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 2004 
 2005     // cmp    %rdx,%rax
 2006     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 2007 
 2008     // jne    17 <normal>
 2009     __ jccb(Assembler::notEqual, normal);
 2010 
 2011     // xor    %edx,%edx
 2012     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 2013 
 2014     // cmp    $0xffffffffffffffff,$div
 2015     __ cmpq($div$$Register, -1);
 2016 
 2017     // je     1e <done>
 2018     __ jccb(Assembler::equal, done);
 2019 
 2020     // <normal>
 2021     // cqto
 2022     __ bind(normal);
 2023     __ cdqq();
 2024 
 2025     // idivq (note: must be emitted by the user of this rule)
 2026     // <done>
 2027     __ idivq($div$$Register);
 2028     __ bind(done);
 2029   %}
 2030 
 2031   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 2032   enc_class OpcSE(immI imm)
 2033   %{
 2034     // Emit primary opcode and set sign-extend bit
 2035     // Check for 8-bit immediate, and set sign extend bit in opcode
 2036     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2037       emit_opcode(cbuf, $primary | 0x02);
 2038     } else {
 2039       // 32-bit immediate
 2040       emit_opcode(cbuf, $primary);
 2041     }
 2042   %}
 2043 
 2044   enc_class OpcSErm(rRegI dst, immI imm)
 2045   %{
 2046     // OpcSEr/m
 2047     int dstenc = $dst$$reg;
 2048     if (dstenc >= 8) {
 2049       emit_opcode(cbuf, Assembler::REX_B);
 2050       dstenc -= 8;
 2051     }
 2052     // Emit primary opcode and set sign-extend bit
 2053     // Check for 8-bit immediate, and set sign extend bit in opcode
 2054     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2055       emit_opcode(cbuf, $primary | 0x02);
 2056     } else {
 2057       // 32-bit immediate
 2058       emit_opcode(cbuf, $primary);
 2059     }
 2060     // Emit r/m byte with secondary opcode, after primary opcode.
 2061     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2062   %}
 2063 
 2064   enc_class OpcSErm_wide(rRegL dst, immI imm)
 2065   %{
 2066     // OpcSEr/m
 2067     int dstenc = $dst$$reg;
 2068     if (dstenc < 8) {
 2069       emit_opcode(cbuf, Assembler::REX_W);
 2070     } else {
 2071       emit_opcode(cbuf, Assembler::REX_WB);
 2072       dstenc -= 8;
 2073     }
 2074     // Emit primary opcode and set sign-extend bit
 2075     // Check for 8-bit immediate, and set sign extend bit in opcode
 2076     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2077       emit_opcode(cbuf, $primary | 0x02);
 2078     } else {
 2079       // 32-bit immediate
 2080       emit_opcode(cbuf, $primary);
 2081     }
 2082     // Emit r/m byte with secondary opcode, after primary opcode.
 2083     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2084   %}
 2085 
 2086   enc_class Con8or32(immI imm)
 2087   %{
 2088     // Check for 8-bit immediate, and set sign extend bit in opcode
 2089     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2090       $$$emit8$imm$$constant;
 2091     } else {
 2092       // 32-bit immediate
 2093       $$$emit32$imm$$constant;
 2094     }
 2095   %}
 2096 
 2097   enc_class opc2_reg(rRegI dst)
 2098   %{
 2099     // BSWAP
 2100     emit_cc(cbuf, $secondary, $dst$$reg);
 2101   %}
 2102 
 2103   enc_class opc3_reg(rRegI dst)
 2104   %{
 2105     // BSWAP
 2106     emit_cc(cbuf, $tertiary, $dst$$reg);
 2107   %}
 2108 
 2109   enc_class reg_opc(rRegI div)
 2110   %{
 2111     // INC, DEC, IDIV, IMOD, JMP indirect, ...
 2112     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
 2113   %}
 2114 
 2115   enc_class enc_cmov(cmpOp cop)
 2116   %{
 2117     // CMOV
 2118     $$$emit8$primary;
 2119     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2120   %}
 2121 
 2122   enc_class enc_PartialSubtypeCheck()
 2123   %{
 2124     Register Rrdi = as_Register(RDI_enc); // result register
 2125     Register Rrax = as_Register(RAX_enc); // super class
 2126     Register Rrcx = as_Register(RCX_enc); // killed
 2127     Register Rrsi = as_Register(RSI_enc); // sub class
 2128     Label miss;
 2129     const bool set_cond_codes = true;
 2130 
 2131     MacroAssembler _masm(&cbuf);
 2132     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2133                                      NULL, &miss,
 2134                                      /*set_cond_codes:*/ true);
 2135     if ($primary) {
 2136       __ xorptr(Rrdi, Rrdi);
 2137     }
 2138     __ bind(miss);
 2139   %}
 2140 
 2141   enc_class clear_avx %{
 2142     debug_only(int off0 = cbuf.insts_size());
 2143     if (generate_vzeroupper(Compile::current())) {
 2144       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2145       // Clear upper bits of YMM registers when current compiled code uses
 2146       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2147       MacroAssembler _masm(&cbuf);
 2148       __ vzeroupper();
 2149     }
 2150     debug_only(int off1 = cbuf.insts_size());
 2151     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2152   %}
 2153 
 2154   enc_class Java_To_Runtime(method meth) %{
 2155     // No relocation needed
 2156     MacroAssembler _masm(&cbuf);
 2157     __ mov64(r10, (int64_t) $meth$$method);
 2158     __ call(r10);
 2159     __ post_call_nop();
 2160   %}
 2161 
 2162   enc_class Java_Static_Call(method meth)
 2163   %{
 2164     // JAVA STATIC CALL
 2165     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 2166     // determine who we intended to call.
 2167     MacroAssembler _masm(&cbuf);
 2168     cbuf.set_insts_mark();
 2169 
 2170     if (!_method) {
 2171       $$$emit8$primary;
 2172       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2173                      runtime_call_Relocation::spec(),
 2174                      RELOC_DISP32);
 2175     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2176       // The NOP here is purely to ensure that eliding a call to
 2177       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2178       __ addr_nop_5();
 2179       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2180     } else {
 2181       $$$emit8$primary;
 2182       int method_index = resolved_method_index(cbuf);
 2183       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2184                                                   : static_call_Relocation::spec(method_index);
 2185       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2186                      rspec, RELOC_DISP32);
 2187       address mark = cbuf.insts_mark();
 2188       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2189         // Calls of the same statically bound method can share
 2190         // a stub to the interpreter.
 2191         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2192       } else {
 2193         // Emit stubs for static call.
 2194         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2195         if (stub == NULL) {
 2196           ciEnv::current()->record_failure("CodeCache is full");
 2197           return;
 2198         }
 2199       }
 2200     }
 2201     _masm.clear_inst_mark();
 2202     __ post_call_nop();
 2203   %}
 2204 
 2205   enc_class Java_Dynamic_Call(method meth) %{
 2206     MacroAssembler _masm(&cbuf);
 2207     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2208     __ post_call_nop();
 2209   %}
 2210 
 2211   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2212   %{
 2213     // SAL, SAR, SHR
 2214     int dstenc = $dst$$reg;
 2215     if (dstenc >= 8) {
 2216       emit_opcode(cbuf, Assembler::REX_B);
 2217       dstenc -= 8;
 2218     }
 2219     $$$emit8$primary;
 2220     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2221     $$$emit8$shift$$constant;
 2222   %}
 2223 
 2224   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
 2225   %{
 2226     // SAL, SAR, SHR
 2227     int dstenc = $dst$$reg;
 2228     if (dstenc < 8) {
 2229       emit_opcode(cbuf, Assembler::REX_W);
 2230     } else {
 2231       emit_opcode(cbuf, Assembler::REX_WB);
 2232       dstenc -= 8;
 2233     }
 2234     $$$emit8$primary;
 2235     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2236     $$$emit8$shift$$constant;
 2237   %}
 2238 
 2239   enc_class load_immI(rRegI dst, immI src)
 2240   %{
 2241     int dstenc = $dst$$reg;
 2242     if (dstenc >= 8) {
 2243       emit_opcode(cbuf, Assembler::REX_B);
 2244       dstenc -= 8;
 2245     }
 2246     emit_opcode(cbuf, 0xB8 | dstenc);
 2247     $$$emit32$src$$constant;
 2248   %}
 2249 
 2250   enc_class load_immL(rRegL dst, immL src)
 2251   %{
 2252     int dstenc = $dst$$reg;
 2253     if (dstenc < 8) {
 2254       emit_opcode(cbuf, Assembler::REX_W);
 2255     } else {
 2256       emit_opcode(cbuf, Assembler::REX_WB);
 2257       dstenc -= 8;
 2258     }
 2259     emit_opcode(cbuf, 0xB8 | dstenc);
 2260     emit_d64(cbuf, $src$$constant);
 2261   %}
 2262 
 2263   enc_class load_immUL32(rRegL dst, immUL32 src)
 2264   %{
 2265     // same as load_immI, but this time we care about zeroes in the high word
 2266     int dstenc = $dst$$reg;
 2267     if (dstenc >= 8) {
 2268       emit_opcode(cbuf, Assembler::REX_B);
 2269       dstenc -= 8;
 2270     }
 2271     emit_opcode(cbuf, 0xB8 | dstenc);
 2272     $$$emit32$src$$constant;
 2273   %}
 2274 
 2275   enc_class load_immL32(rRegL dst, immL32 src)
 2276   %{
 2277     int dstenc = $dst$$reg;
 2278     if (dstenc < 8) {
 2279       emit_opcode(cbuf, Assembler::REX_W);
 2280     } else {
 2281       emit_opcode(cbuf, Assembler::REX_WB);
 2282       dstenc -= 8;
 2283     }
 2284     emit_opcode(cbuf, 0xC7);
 2285     emit_rm(cbuf, 0x03, 0x00, dstenc);
 2286     $$$emit32$src$$constant;
 2287   %}
 2288 
 2289   enc_class load_immP31(rRegP dst, immP32 src)
 2290   %{
 2291     // same as load_immI, but this time we care about zeroes in the high word
 2292     int dstenc = $dst$$reg;
 2293     if (dstenc >= 8) {
 2294       emit_opcode(cbuf, Assembler::REX_B);
 2295       dstenc -= 8;
 2296     }
 2297     emit_opcode(cbuf, 0xB8 | dstenc);
 2298     $$$emit32$src$$constant;
 2299   %}
 2300 
 2301   enc_class load_immP(rRegP dst, immP src)
 2302   %{
 2303     int dstenc = $dst$$reg;
 2304     if (dstenc < 8) {
 2305       emit_opcode(cbuf, Assembler::REX_W);
 2306     } else {
 2307       emit_opcode(cbuf, Assembler::REX_WB);
 2308       dstenc -= 8;
 2309     }
 2310     emit_opcode(cbuf, 0xB8 | dstenc);
 2311     // This next line should be generated from ADLC
 2312     if ($src->constant_reloc() != relocInfo::none) {
 2313       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
 2314     } else {
 2315       emit_d64(cbuf, $src$$constant);
 2316     }
 2317   %}
 2318 
 2319   enc_class Con32(immI src)
 2320   %{
 2321     // Output immediate
 2322     $$$emit32$src$$constant;
 2323   %}
 2324 
 2325   enc_class Con32F_as_bits(immF src)
 2326   %{
 2327     // Output Float immediate bits
 2328     jfloat jf = $src$$constant;
 2329     jint jf_as_bits = jint_cast(jf);
 2330     emit_d32(cbuf, jf_as_bits);
 2331   %}
 2332 
 2333   enc_class Con16(immI src)
 2334   %{
 2335     // Output immediate
 2336     $$$emit16$src$$constant;
 2337   %}
 2338 
 2339   // How is this different from Con32??? XXX
 2340   enc_class Con_d32(immI src)
 2341   %{
 2342     emit_d32(cbuf,$src$$constant);
 2343   %}
 2344 
 2345   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
 2346     // Output immediate memory reference
 2347     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2348     emit_d32(cbuf, 0x00);
 2349   %}
 2350 
 2351   enc_class lock_prefix()
 2352   %{
 2353     emit_opcode(cbuf, 0xF0); // lock
 2354   %}
 2355 
 2356   enc_class REX_mem(memory mem)
 2357   %{
 2358     if ($mem$$base >= 8) {
 2359       if ($mem$$index < 8) {
 2360         emit_opcode(cbuf, Assembler::REX_B);
 2361       } else {
 2362         emit_opcode(cbuf, Assembler::REX_XB);
 2363       }
 2364     } else {
 2365       if ($mem$$index >= 8) {
 2366         emit_opcode(cbuf, Assembler::REX_X);
 2367       }
 2368     }
 2369   %}
 2370 
 2371   enc_class REX_mem_wide(memory mem)
 2372   %{
 2373     if ($mem$$base >= 8) {
 2374       if ($mem$$index < 8) {
 2375         emit_opcode(cbuf, Assembler::REX_WB);
 2376       } else {
 2377         emit_opcode(cbuf, Assembler::REX_WXB);
 2378       }
 2379     } else {
 2380       if ($mem$$index < 8) {
 2381         emit_opcode(cbuf, Assembler::REX_W);
 2382       } else {
 2383         emit_opcode(cbuf, Assembler::REX_WX);
 2384       }
 2385     }
 2386   %}
 2387 
 2388   // for byte regs
 2389   enc_class REX_breg(rRegI reg)
 2390   %{
 2391     if ($reg$$reg >= 4) {
 2392       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2393     }
 2394   %}
 2395 
 2396   // for byte regs
 2397   enc_class REX_reg_breg(rRegI dst, rRegI src)
 2398   %{
 2399     if ($dst$$reg < 8) {
 2400       if ($src$$reg >= 4) {
 2401         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2402       }
 2403     } else {
 2404       if ($src$$reg < 8) {
 2405         emit_opcode(cbuf, Assembler::REX_R);
 2406       } else {
 2407         emit_opcode(cbuf, Assembler::REX_RB);
 2408       }
 2409     }
 2410   %}
 2411 
 2412   // for byte regs
 2413   enc_class REX_breg_mem(rRegI reg, memory mem)
 2414   %{
 2415     if ($reg$$reg < 8) {
 2416       if ($mem$$base < 8) {
 2417         if ($mem$$index >= 8) {
 2418           emit_opcode(cbuf, Assembler::REX_X);
 2419         } else if ($reg$$reg >= 4) {
 2420           emit_opcode(cbuf, Assembler::REX);
 2421         }
 2422       } else {
 2423         if ($mem$$index < 8) {
 2424           emit_opcode(cbuf, Assembler::REX_B);
 2425         } else {
 2426           emit_opcode(cbuf, Assembler::REX_XB);
 2427         }
 2428       }
 2429     } else {
 2430       if ($mem$$base < 8) {
 2431         if ($mem$$index < 8) {
 2432           emit_opcode(cbuf, Assembler::REX_R);
 2433         } else {
 2434           emit_opcode(cbuf, Assembler::REX_RX);
 2435         }
 2436       } else {
 2437         if ($mem$$index < 8) {
 2438           emit_opcode(cbuf, Assembler::REX_RB);
 2439         } else {
 2440           emit_opcode(cbuf, Assembler::REX_RXB);
 2441         }
 2442       }
 2443     }
 2444   %}
 2445 
 2446   enc_class REX_reg(rRegI reg)
 2447   %{
 2448     if ($reg$$reg >= 8) {
 2449       emit_opcode(cbuf, Assembler::REX_B);
 2450     }
 2451   %}
 2452 
 2453   enc_class REX_reg_wide(rRegI reg)
 2454   %{
 2455     if ($reg$$reg < 8) {
 2456       emit_opcode(cbuf, Assembler::REX_W);
 2457     } else {
 2458       emit_opcode(cbuf, Assembler::REX_WB);
 2459     }
 2460   %}
 2461 
 2462   enc_class REX_reg_reg(rRegI dst, rRegI src)
 2463   %{
 2464     if ($dst$$reg < 8) {
 2465       if ($src$$reg >= 8) {
 2466         emit_opcode(cbuf, Assembler::REX_B);
 2467       }
 2468     } else {
 2469       if ($src$$reg < 8) {
 2470         emit_opcode(cbuf, Assembler::REX_R);
 2471       } else {
 2472         emit_opcode(cbuf, Assembler::REX_RB);
 2473       }
 2474     }
 2475   %}
 2476 
 2477   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
 2478   %{
 2479     if ($dst$$reg < 8) {
 2480       if ($src$$reg < 8) {
 2481         emit_opcode(cbuf, Assembler::REX_W);
 2482       } else {
 2483         emit_opcode(cbuf, Assembler::REX_WB);
 2484       }
 2485     } else {
 2486       if ($src$$reg < 8) {
 2487         emit_opcode(cbuf, Assembler::REX_WR);
 2488       } else {
 2489         emit_opcode(cbuf, Assembler::REX_WRB);
 2490       }
 2491     }
 2492   %}
 2493 
 2494   enc_class REX_reg_mem(rRegI reg, memory mem)
 2495   %{
 2496     if ($reg$$reg < 8) {
 2497       if ($mem$$base < 8) {
 2498         if ($mem$$index >= 8) {
 2499           emit_opcode(cbuf, Assembler::REX_X);
 2500         }
 2501       } else {
 2502         if ($mem$$index < 8) {
 2503           emit_opcode(cbuf, Assembler::REX_B);
 2504         } else {
 2505           emit_opcode(cbuf, Assembler::REX_XB);
 2506         }
 2507       }
 2508     } else {
 2509       if ($mem$$base < 8) {
 2510         if ($mem$$index < 8) {
 2511           emit_opcode(cbuf, Assembler::REX_R);
 2512         } else {
 2513           emit_opcode(cbuf, Assembler::REX_RX);
 2514         }
 2515       } else {
 2516         if ($mem$$index < 8) {
 2517           emit_opcode(cbuf, Assembler::REX_RB);
 2518         } else {
 2519           emit_opcode(cbuf, Assembler::REX_RXB);
 2520         }
 2521       }
 2522     }
 2523   %}
 2524 
 2525   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
 2526   %{
 2527     if ($reg$$reg < 8) {
 2528       if ($mem$$base < 8) {
 2529         if ($mem$$index < 8) {
 2530           emit_opcode(cbuf, Assembler::REX_W);
 2531         } else {
 2532           emit_opcode(cbuf, Assembler::REX_WX);
 2533         }
 2534       } else {
 2535         if ($mem$$index < 8) {
 2536           emit_opcode(cbuf, Assembler::REX_WB);
 2537         } else {
 2538           emit_opcode(cbuf, Assembler::REX_WXB);
 2539         }
 2540       }
 2541     } else {
 2542       if ($mem$$base < 8) {
 2543         if ($mem$$index < 8) {
 2544           emit_opcode(cbuf, Assembler::REX_WR);
 2545         } else {
 2546           emit_opcode(cbuf, Assembler::REX_WRX);
 2547         }
 2548       } else {
 2549         if ($mem$$index < 8) {
 2550           emit_opcode(cbuf, Assembler::REX_WRB);
 2551         } else {
 2552           emit_opcode(cbuf, Assembler::REX_WRXB);
 2553         }
 2554       }
 2555     }
 2556   %}
 2557 
 2558   enc_class reg_mem(rRegI ereg, memory mem)
 2559   %{
 2560     // High registers handle in encode_RegMem
 2561     int reg = $ereg$$reg;
 2562     int base = $mem$$base;
 2563     int index = $mem$$index;
 2564     int scale = $mem$$scale;
 2565     int disp = $mem$$disp;
 2566     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2567 
 2568     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
 2569   %}
 2570 
 2571   enc_class RM_opc_mem(immI rm_opcode, memory mem)
 2572   %{
 2573     int rm_byte_opcode = $rm_opcode$$constant;
 2574 
 2575     // High registers handle in encode_RegMem
 2576     int base = $mem$$base;
 2577     int index = $mem$$index;
 2578     int scale = $mem$$scale;
 2579     int displace = $mem$$disp;
 2580 
 2581     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
 2582                                             // working with static
 2583                                             // globals
 2584     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
 2585                   disp_reloc);
 2586   %}
 2587 
 2588   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
 2589   %{
 2590     int reg_encoding = $dst$$reg;
 2591     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2592     int index        = 0x04;            // 0x04 indicates no index
 2593     int scale        = 0x00;            // 0x00 indicates no scale
 2594     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2595     relocInfo::relocType disp_reloc = relocInfo::none;
 2596     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
 2597                   disp_reloc);
 2598   %}
 2599 
 2600   enc_class neg_reg(rRegI dst)
 2601   %{
 2602     int dstenc = $dst$$reg;
 2603     if (dstenc >= 8) {
 2604       emit_opcode(cbuf, Assembler::REX_B);
 2605       dstenc -= 8;
 2606     }
 2607     // NEG $dst
 2608     emit_opcode(cbuf, 0xF7);
 2609     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2610   %}
 2611 
 2612   enc_class neg_reg_wide(rRegI dst)
 2613   %{
 2614     int dstenc = $dst$$reg;
 2615     if (dstenc < 8) {
 2616       emit_opcode(cbuf, Assembler::REX_W);
 2617     } else {
 2618       emit_opcode(cbuf, Assembler::REX_WB);
 2619       dstenc -= 8;
 2620     }
 2621     // NEG $dst
 2622     emit_opcode(cbuf, 0xF7);
 2623     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2624   %}
 2625 
 2626   enc_class setLT_reg(rRegI dst)
 2627   %{
 2628     int dstenc = $dst$$reg;
 2629     if (dstenc >= 8) {
 2630       emit_opcode(cbuf, Assembler::REX_B);
 2631       dstenc -= 8;
 2632     } else if (dstenc >= 4) {
 2633       emit_opcode(cbuf, Assembler::REX);
 2634     }
 2635     // SETLT $dst
 2636     emit_opcode(cbuf, 0x0F);
 2637     emit_opcode(cbuf, 0x9C);
 2638     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2639   %}
 2640 
 2641   enc_class setNZ_reg(rRegI dst)
 2642   %{
 2643     int dstenc = $dst$$reg;
 2644     if (dstenc >= 8) {
 2645       emit_opcode(cbuf, Assembler::REX_B);
 2646       dstenc -= 8;
 2647     } else if (dstenc >= 4) {
 2648       emit_opcode(cbuf, Assembler::REX);
 2649     }
 2650     // SETNZ $dst
 2651     emit_opcode(cbuf, 0x0F);
 2652     emit_opcode(cbuf, 0x95);
 2653     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2654   %}
 2655 
 2656 
 2657   // Compare the lonogs and set -1, 0, or 1 into dst
 2658   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
 2659   %{
 2660     int src1enc = $src1$$reg;
 2661     int src2enc = $src2$$reg;
 2662     int dstenc = $dst$$reg;
 2663 
 2664     // cmpq $src1, $src2
 2665     if (src1enc < 8) {
 2666       if (src2enc < 8) {
 2667         emit_opcode(cbuf, Assembler::REX_W);
 2668       } else {
 2669         emit_opcode(cbuf, Assembler::REX_WB);
 2670       }
 2671     } else {
 2672       if (src2enc < 8) {
 2673         emit_opcode(cbuf, Assembler::REX_WR);
 2674       } else {
 2675         emit_opcode(cbuf, Assembler::REX_WRB);
 2676       }
 2677     }
 2678     emit_opcode(cbuf, 0x3B);
 2679     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
 2680 
 2681     // movl $dst, -1
 2682     if (dstenc >= 8) {
 2683       emit_opcode(cbuf, Assembler::REX_B);
 2684     }
 2685     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
 2686     emit_d32(cbuf, -1);
 2687 
 2688     // jl,s done
 2689     emit_opcode(cbuf, 0x7C);
 2690     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
 2691 
 2692     // setne $dst
 2693     if (dstenc >= 4) {
 2694       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
 2695     }
 2696     emit_opcode(cbuf, 0x0F);
 2697     emit_opcode(cbuf, 0x95);
 2698     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
 2699 
 2700     // movzbl $dst, $dst
 2701     if (dstenc >= 4) {
 2702       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
 2703     }
 2704     emit_opcode(cbuf, 0x0F);
 2705     emit_opcode(cbuf, 0xB6);
 2706     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
 2707   %}
 2708 
 2709   enc_class Push_ResultXD(regD dst) %{
 2710     MacroAssembler _masm(&cbuf);
 2711     __ fstp_d(Address(rsp, 0));
 2712     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2713     __ addptr(rsp, 8);
 2714   %}
 2715 
 2716   enc_class Push_SrcXD(regD src) %{
 2717     MacroAssembler _masm(&cbuf);
 2718     __ subptr(rsp, 8);
 2719     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2720     __ fld_d(Address(rsp, 0));
 2721   %}
 2722 
 2723 
 2724   enc_class enc_rethrow()
 2725   %{
 2726     cbuf.set_insts_mark();
 2727     emit_opcode(cbuf, 0xE9); // jmp entry
 2728     emit_d32_reloc(cbuf,
 2729                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
 2730                    runtime_call_Relocation::spec(),
 2731                    RELOC_DISP32);
 2732   %}
 2733 
 2734 %}
 2735 
 2736 
 2737 
 2738 //----------FRAME--------------------------------------------------------------
 2739 // Definition of frame structure and management information.
 2740 //
 2741 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2742 //                             |   (to get allocators register number
 2743 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2744 //  r   CALLER     |        |
 2745 //  o     |        +--------+      pad to even-align allocators stack-slot
 2746 //  w     V        |  pad0  |        numbers; owned by CALLER
 2747 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 2748 //  h     ^        |   in   |  5
 2749 //        |        |  args  |  4   Holes in incoming args owned by SELF
 2750 //  |     |        |        |  3
 2751 //  |     |        +--------+
 2752 //  V     |        | old out|      Empty on Intel, window on Sparc
 2753 //        |    old |preserve|      Must be even aligned.
 2754 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 2755 //        |        |   in   |  3   area for Intel ret address
 2756 //     Owned by    |preserve|      Empty on Sparc.
 2757 //       SELF      +--------+
 2758 //        |        |  pad2  |  2   pad to align old SP
 2759 //        |        +--------+  1
 2760 //        |        | locks  |  0
 2761 //        |        +--------+----> OptoReg::stack0(), even aligned
 2762 //        |        |  pad1  | 11   pad to align new SP
 2763 //        |        +--------+
 2764 //        |        |        | 10
 2765 //        |        | spills |  9   spills
 2766 //        V        |        |  8   (pad0 slot for callee)
 2767 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 2768 //        ^        |  out   |  7
 2769 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 2770 //     Owned by    +--------+
 2771 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 2772 //        |    new |preserve|      Must be even-aligned.
 2773 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 2774 //        |        |        |
 2775 //
 2776 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 2777 //         known from SELF's arguments and the Java calling convention.
 2778 //         Region 6-7 is determined per call site.
 2779 // Note 2: If the calling convention leaves holes in the incoming argument
 2780 //         area, those holes are owned by SELF.  Holes in the outgoing area
 2781 //         are owned by the CALLEE.  Holes should not be necessary in the
 2782 //         incoming area, as the Java calling convention is completely under
 2783 //         the control of the AD file.  Doubles can be sorted and packed to
 2784 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 2785 //         varargs C calling conventions.
 2786 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 2787 //         even aligned with pad0 as needed.
 2788 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 2789 //         region 6-11 is even aligned; it may be padded out more so that
 2790 //         the region from SP to FP meets the minimum stack alignment.
 2791 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 2792 //         alignment.  Region 11, pad1, may be dynamically extended so that
 2793 //         SP meets the minimum alignment.
 2794 
 2795 frame
 2796 %{
 2797   // These three registers define part of the calling convention
 2798   // between compiled code and the interpreter.
 2799   inline_cache_reg(RAX);                // Inline Cache Register
 2800 
 2801   // Optional: name the operand used by cisc-spilling to access
 2802   // [stack_pointer + offset]
 2803   cisc_spilling_operand_name(indOffset32);
 2804 
 2805   // Number of stack slots consumed by locking an object
 2806   sync_stack_slots(2);
 2807 
 2808   // Compiled code's Frame Pointer
 2809   frame_pointer(RSP);
 2810 
 2811   // Interpreter stores its frame pointer in a register which is
 2812   // stored to the stack by I2CAdaptors.
 2813   // I2CAdaptors convert from interpreted java to compiled java.
 2814   interpreter_frame_pointer(RBP);
 2815 
 2816   // Stack alignment requirement
 2817   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 2818 
 2819   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 2820   // for calls to C.  Supports the var-args backing area for register parms.
 2821   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 2822 
 2823   // The after-PROLOG location of the return address.  Location of
 2824   // return address specifies a type (REG or STACK) and a number
 2825   // representing the register number (i.e. - use a register name) or
 2826   // stack slot.
 2827   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 2828   // Otherwise, it is above the locks and verification slot and alignment word
 2829   return_addr(STACK - 2 +
 2830               align_up((Compile::current()->in_preserve_stack_slots() +
 2831                         Compile::current()->fixed_slots()),
 2832                        stack_alignment_in_slots()));
 2833 
 2834   // Location of compiled Java return values.  Same as C for now.
 2835   return_value
 2836   %{
 2837     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 2838            "only return normal values");
 2839 
 2840     static const int lo[Op_RegL + 1] = {
 2841       0,
 2842       0,
 2843       RAX_num,  // Op_RegN
 2844       RAX_num,  // Op_RegI
 2845       RAX_num,  // Op_RegP
 2846       XMM0_num, // Op_RegF
 2847       XMM0_num, // Op_RegD
 2848       RAX_num   // Op_RegL
 2849     };
 2850     static const int hi[Op_RegL + 1] = {
 2851       0,
 2852       0,
 2853       OptoReg::Bad, // Op_RegN
 2854       OptoReg::Bad, // Op_RegI
 2855       RAX_H_num,    // Op_RegP
 2856       OptoReg::Bad, // Op_RegF
 2857       XMM0b_num,    // Op_RegD
 2858       RAX_H_num     // Op_RegL
 2859     };
 2860     // Excluded flags and vector registers.
 2861     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2862     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2863   %}
 2864 %}
 2865 
 2866 //----------ATTRIBUTES---------------------------------------------------------
 2867 //----------Operand Attributes-------------------------------------------------
 2868 op_attrib op_cost(0);        // Required cost attribute
 2869 
 2870 //----------Instruction Attributes---------------------------------------------
 2871 ins_attrib ins_cost(100);       // Required cost attribute
 2872 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2873 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2874                                 // a non-matching short branch variant
 2875                                 // of some long branch?
 2876 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2877                                 // be a power of 2) specifies the
 2878                                 // alignment that some part of the
 2879                                 // instruction (not necessarily the
 2880                                 // start) requires.  If > 1, a
 2881                                 // compute_padding() function must be
 2882                                 // provided for the instruction
 2883 
 2884 //----------OPERANDS-----------------------------------------------------------
 2885 // Operand definitions must precede instruction definitions for correct parsing
 2886 // in the ADLC because operands constitute user defined types which are used in
 2887 // instruction definitions.
 2888 
 2889 //----------Simple Operands----------------------------------------------------
 2890 // Immediate Operands
 2891 // Integer Immediate
 2892 operand immI()
 2893 %{
 2894   match(ConI);
 2895 
 2896   op_cost(10);
 2897   format %{ %}
 2898   interface(CONST_INTER);
 2899 %}
 2900 
 2901 // Constant for test vs zero
 2902 operand immI_0()
 2903 %{
 2904   predicate(n->get_int() == 0);
 2905   match(ConI);
 2906 
 2907   op_cost(0);
 2908   format %{ %}
 2909   interface(CONST_INTER);
 2910 %}
 2911 
 2912 // Constant for increment
 2913 operand immI_1()
 2914 %{
 2915   predicate(n->get_int() == 1);
 2916   match(ConI);
 2917 
 2918   op_cost(0);
 2919   format %{ %}
 2920   interface(CONST_INTER);
 2921 %}
 2922 
 2923 // Constant for decrement
 2924 operand immI_M1()
 2925 %{
 2926   predicate(n->get_int() == -1);
 2927   match(ConI);
 2928 
 2929   op_cost(0);
 2930   format %{ %}
 2931   interface(CONST_INTER);
 2932 %}
 2933 
 2934 operand immI_2()
 2935 %{
 2936   predicate(n->get_int() == 2);
 2937   match(ConI);
 2938 
 2939   op_cost(0);
 2940   format %{ %}
 2941   interface(CONST_INTER);
 2942 %}
 2943 
 2944 operand immI_4()
 2945 %{
 2946   predicate(n->get_int() == 4);
 2947   match(ConI);
 2948 
 2949   op_cost(0);
 2950   format %{ %}
 2951   interface(CONST_INTER);
 2952 %}
 2953 
 2954 operand immI_8()
 2955 %{
 2956   predicate(n->get_int() == 8);
 2957   match(ConI);
 2958 
 2959   op_cost(0);
 2960   format %{ %}
 2961   interface(CONST_INTER);
 2962 %}
 2963 
 2964 // Valid scale values for addressing modes
 2965 operand immI2()
 2966 %{
 2967   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2968   match(ConI);
 2969 
 2970   format %{ %}
 2971   interface(CONST_INTER);
 2972 %}
 2973 
 2974 operand immU7()
 2975 %{
 2976   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2977   match(ConI);
 2978 
 2979   op_cost(5);
 2980   format %{ %}
 2981   interface(CONST_INTER);
 2982 %}
 2983 
 2984 operand immI8()
 2985 %{
 2986   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2987   match(ConI);
 2988 
 2989   op_cost(5);
 2990   format %{ %}
 2991   interface(CONST_INTER);
 2992 %}
 2993 
 2994 operand immU8()
 2995 %{
 2996   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 2997   match(ConI);
 2998 
 2999   op_cost(5);
 3000   format %{ %}
 3001   interface(CONST_INTER);
 3002 %}
 3003 
 3004 operand immI16()
 3005 %{
 3006   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3007   match(ConI);
 3008 
 3009   op_cost(10);
 3010   format %{ %}
 3011   interface(CONST_INTER);
 3012 %}
 3013 
 3014 // Int Immediate non-negative
 3015 operand immU31()
 3016 %{
 3017   predicate(n->get_int() >= 0);
 3018   match(ConI);
 3019 
 3020   op_cost(0);
 3021   format %{ %}
 3022   interface(CONST_INTER);
 3023 %}
 3024 
 3025 // Constant for long shifts
 3026 operand immI_32()
 3027 %{
 3028   predicate( n->get_int() == 32 );
 3029   match(ConI);
 3030 
 3031   op_cost(0);
 3032   format %{ %}
 3033   interface(CONST_INTER);
 3034 %}
 3035 
 3036 // Constant for long shifts
 3037 operand immI_64()
 3038 %{
 3039   predicate( n->get_int() == 64 );
 3040   match(ConI);
 3041 
 3042   op_cost(0);
 3043   format %{ %}
 3044   interface(CONST_INTER);
 3045 %}
 3046 
 3047 // Pointer Immediate
 3048 operand immP()
 3049 %{
 3050   match(ConP);
 3051 
 3052   op_cost(10);
 3053   format %{ %}
 3054   interface(CONST_INTER);
 3055 %}
 3056 
 3057 // NULL Pointer Immediate
 3058 operand immP0()
 3059 %{
 3060   predicate(n->get_ptr() == 0);
 3061   match(ConP);
 3062 
 3063   op_cost(5);
 3064   format %{ %}
 3065   interface(CONST_INTER);
 3066 %}
 3067 
 3068 // Pointer Immediate
 3069 operand immN() %{
 3070   match(ConN);
 3071 
 3072   op_cost(10);
 3073   format %{ %}
 3074   interface(CONST_INTER);
 3075 %}
 3076 
 3077 operand immNKlass() %{
 3078   match(ConNKlass);
 3079 
 3080   op_cost(10);
 3081   format %{ %}
 3082   interface(CONST_INTER);
 3083 %}
 3084 
 3085 // NULL Pointer Immediate
 3086 operand immN0() %{
 3087   predicate(n->get_narrowcon() == 0);
 3088   match(ConN);
 3089 
 3090   op_cost(5);
 3091   format %{ %}
 3092   interface(CONST_INTER);
 3093 %}
 3094 
 3095 operand immP31()
 3096 %{
 3097   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3098             && (n->get_ptr() >> 31) == 0);
 3099   match(ConP);
 3100 
 3101   op_cost(5);
 3102   format %{ %}
 3103   interface(CONST_INTER);
 3104 %}
 3105 
 3106 
 3107 // Long Immediate
 3108 operand immL()
 3109 %{
 3110   match(ConL);
 3111 
 3112   op_cost(20);
 3113   format %{ %}
 3114   interface(CONST_INTER);
 3115 %}
 3116 
 3117 // Long Immediate 8-bit
 3118 operand immL8()
 3119 %{
 3120   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 3121   match(ConL);
 3122 
 3123   op_cost(5);
 3124   format %{ %}
 3125   interface(CONST_INTER);
 3126 %}
 3127 
 3128 // Long Immediate 32-bit unsigned
 3129 operand immUL32()
 3130 %{
 3131   predicate(n->get_long() == (unsigned int) (n->get_long()));
 3132   match(ConL);
 3133 
 3134   op_cost(10);
 3135   format %{ %}
 3136   interface(CONST_INTER);
 3137 %}
 3138 
 3139 // Long Immediate 32-bit signed
 3140 operand immL32()
 3141 %{
 3142   predicate(n->get_long() == (int) (n->get_long()));
 3143   match(ConL);
 3144 
 3145   op_cost(15);
 3146   format %{ %}
 3147   interface(CONST_INTER);
 3148 %}
 3149 
 3150 operand immL_Pow2()
 3151 %{
 3152   predicate(is_power_of_2((julong)n->get_long()));
 3153   match(ConL);
 3154 
 3155   op_cost(15);
 3156   format %{ %}
 3157   interface(CONST_INTER);
 3158 %}
 3159 
 3160 operand immL_NotPow2()
 3161 %{
 3162   predicate(is_power_of_2((julong)~n->get_long()));
 3163   match(ConL);
 3164 
 3165   op_cost(15);
 3166   format %{ %}
 3167   interface(CONST_INTER);
 3168 %}
 3169 
 3170 // Long Immediate zero
 3171 operand immL0()
 3172 %{
 3173   predicate(n->get_long() == 0L);
 3174   match(ConL);
 3175 
 3176   op_cost(10);
 3177   format %{ %}
 3178   interface(CONST_INTER);
 3179 %}
 3180 
 3181 // Constant for increment
 3182 operand immL1()
 3183 %{
 3184   predicate(n->get_long() == 1);
 3185   match(ConL);
 3186 
 3187   format %{ %}
 3188   interface(CONST_INTER);
 3189 %}
 3190 
 3191 // Constant for decrement
 3192 operand immL_M1()
 3193 %{
 3194   predicate(n->get_long() == -1);
 3195   match(ConL);
 3196 
 3197   format %{ %}
 3198   interface(CONST_INTER);
 3199 %}
 3200 
 3201 // Long Immediate: the value 10
 3202 operand immL10()
 3203 %{
 3204   predicate(n->get_long() == 10);
 3205   match(ConL);
 3206 
 3207   format %{ %}
 3208   interface(CONST_INTER);
 3209 %}
 3210 
 3211 // Long immediate from 0 to 127.
 3212 // Used for a shorter form of long mul by 10.
 3213 operand immL_127()
 3214 %{
 3215   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 3216   match(ConL);
 3217 
 3218   op_cost(10);
 3219   format %{ %}
 3220   interface(CONST_INTER);
 3221 %}
 3222 
 3223 // Long Immediate: low 32-bit mask
 3224 operand immL_32bits()
 3225 %{
 3226   predicate(n->get_long() == 0xFFFFFFFFL);
 3227   match(ConL);
 3228   op_cost(20);
 3229 
 3230   format %{ %}
 3231   interface(CONST_INTER);
 3232 %}
 3233 
 3234 // Int Immediate: 2^n-1, positive
 3235 operand immI_Pow2M1()
 3236 %{
 3237   predicate((n->get_int() > 0)
 3238             && is_power_of_2((juint)n->get_int() + 1));
 3239   match(ConI);
 3240 
 3241   op_cost(20);
 3242   format %{ %}
 3243   interface(CONST_INTER);
 3244 %}
 3245 
 3246 // Float Immediate zero
 3247 operand immF0()
 3248 %{
 3249   predicate(jint_cast(n->getf()) == 0);
 3250   match(ConF);
 3251 
 3252   op_cost(5);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 // Float Immediate
 3258 operand immF()
 3259 %{
 3260   match(ConF);
 3261 
 3262   op_cost(15);
 3263   format %{ %}
 3264   interface(CONST_INTER);
 3265 %}
 3266 
 3267 // Double Immediate zero
 3268 operand immD0()
 3269 %{
 3270   predicate(jlong_cast(n->getd()) == 0);
 3271   match(ConD);
 3272 
 3273   op_cost(5);
 3274   format %{ %}
 3275   interface(CONST_INTER);
 3276 %}
 3277 
 3278 // Double Immediate
 3279 operand immD()
 3280 %{
 3281   match(ConD);
 3282 
 3283   op_cost(15);
 3284   format %{ %}
 3285   interface(CONST_INTER);
 3286 %}
 3287 
 3288 // Immediates for special shifts (sign extend)
 3289 
 3290 // Constants for increment
 3291 operand immI_16()
 3292 %{
 3293   predicate(n->get_int() == 16);
 3294   match(ConI);
 3295 
 3296   format %{ %}
 3297   interface(CONST_INTER);
 3298 %}
 3299 
 3300 operand immI_24()
 3301 %{
 3302   predicate(n->get_int() == 24);
 3303   match(ConI);
 3304 
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 // Constant for byte-wide masking
 3310 operand immI_255()
 3311 %{
 3312   predicate(n->get_int() == 255);
 3313   match(ConI);
 3314 
 3315   format %{ %}
 3316   interface(CONST_INTER);
 3317 %}
 3318 
 3319 // Constant for short-wide masking
 3320 operand immI_65535()
 3321 %{
 3322   predicate(n->get_int() == 65535);
 3323   match(ConI);
 3324 
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 // Constant for byte-wide masking
 3330 operand immL_255()
 3331 %{
 3332   predicate(n->get_long() == 255);
 3333   match(ConL);
 3334 
 3335   format %{ %}
 3336   interface(CONST_INTER);
 3337 %}
 3338 
 3339 // Constant for short-wide masking
 3340 operand immL_65535()
 3341 %{
 3342   predicate(n->get_long() == 65535);
 3343   match(ConL);
 3344 
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand kReg()
 3350 %{
 3351   constraint(ALLOC_IN_RC(vectmask_reg));
 3352   match(RegVectMask);
 3353   format %{%}
 3354   interface(REG_INTER);
 3355 %}
 3356 
 3357 operand kReg_K1()
 3358 %{
 3359   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3360   match(RegVectMask);
 3361   format %{%}
 3362   interface(REG_INTER);
 3363 %}
 3364 
 3365 operand kReg_K2()
 3366 %{
 3367   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3368   match(RegVectMask);
 3369   format %{%}
 3370   interface(REG_INTER);
 3371 %}
 3372 
 3373 // Special Registers
 3374 operand kReg_K3()
 3375 %{
 3376   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3377   match(RegVectMask);
 3378   format %{%}
 3379   interface(REG_INTER);
 3380 %}
 3381 
 3382 operand kReg_K4()
 3383 %{
 3384   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3385   match(RegVectMask);
 3386   format %{%}
 3387   interface(REG_INTER);
 3388 %}
 3389 
 3390 operand kReg_K5()
 3391 %{
 3392   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3393   match(RegVectMask);
 3394   format %{%}
 3395   interface(REG_INTER);
 3396 %}
 3397 
 3398 operand kReg_K6()
 3399 %{
 3400   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3401   match(RegVectMask);
 3402   format %{%}
 3403   interface(REG_INTER);
 3404 %}
 3405 
 3406 // Special Registers
 3407 operand kReg_K7()
 3408 %{
 3409   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3410   match(RegVectMask);
 3411   format %{%}
 3412   interface(REG_INTER);
 3413 %}
 3414 
 3415 // Register Operands
 3416 // Integer Register
 3417 operand rRegI()
 3418 %{
 3419   constraint(ALLOC_IN_RC(int_reg));
 3420   match(RegI);
 3421 
 3422   match(rax_RegI);
 3423   match(rbx_RegI);
 3424   match(rcx_RegI);
 3425   match(rdx_RegI);
 3426   match(rdi_RegI);
 3427 
 3428   format %{ %}
 3429   interface(REG_INTER);
 3430 %}
 3431 
 3432 // Special Registers
 3433 operand rax_RegI()
 3434 %{
 3435   constraint(ALLOC_IN_RC(int_rax_reg));
 3436   match(RegI);
 3437   match(rRegI);
 3438 
 3439   format %{ "RAX" %}
 3440   interface(REG_INTER);
 3441 %}
 3442 
 3443 // Special Registers
 3444 operand rbx_RegI()
 3445 %{
 3446   constraint(ALLOC_IN_RC(int_rbx_reg));
 3447   match(RegI);
 3448   match(rRegI);
 3449 
 3450   format %{ "RBX" %}
 3451   interface(REG_INTER);
 3452 %}
 3453 
 3454 operand rcx_RegI()
 3455 %{
 3456   constraint(ALLOC_IN_RC(int_rcx_reg));
 3457   match(RegI);
 3458   match(rRegI);
 3459 
 3460   format %{ "RCX" %}
 3461   interface(REG_INTER);
 3462 %}
 3463 
 3464 operand rdx_RegI()
 3465 %{
 3466   constraint(ALLOC_IN_RC(int_rdx_reg));
 3467   match(RegI);
 3468   match(rRegI);
 3469 
 3470   format %{ "RDX" %}
 3471   interface(REG_INTER);
 3472 %}
 3473 
 3474 operand rdi_RegI()
 3475 %{
 3476   constraint(ALLOC_IN_RC(int_rdi_reg));
 3477   match(RegI);
 3478   match(rRegI);
 3479 
 3480   format %{ "RDI" %}
 3481   interface(REG_INTER);
 3482 %}
 3483 
 3484 operand no_rax_rdx_RegI()
 3485 %{
 3486   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 3487   match(RegI);
 3488   match(rbx_RegI);
 3489   match(rcx_RegI);
 3490   match(rdi_RegI);
 3491 
 3492   format %{ %}
 3493   interface(REG_INTER);
 3494 %}
 3495 
 3496 operand no_rbp_r13_RegI()
 3497 %{
 3498   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 3499   match(RegI);
 3500   match(rRegI);
 3501   match(rax_RegI);
 3502   match(rbx_RegI);
 3503   match(rcx_RegI);
 3504   match(rdx_RegI);
 3505   match(rdi_RegI);
 3506 
 3507   format %{ %}
 3508   interface(REG_INTER);
 3509 %}
 3510 
 3511 // Pointer Register
 3512 operand any_RegP()
 3513 %{
 3514   constraint(ALLOC_IN_RC(any_reg));
 3515   match(RegP);
 3516   match(rax_RegP);
 3517   match(rbx_RegP);
 3518   match(rdi_RegP);
 3519   match(rsi_RegP);
 3520   match(rbp_RegP);
 3521   match(r15_RegP);
 3522   match(rRegP);
 3523 
 3524   format %{ %}
 3525   interface(REG_INTER);
 3526 %}
 3527 
 3528 operand rRegP()
 3529 %{
 3530   constraint(ALLOC_IN_RC(ptr_reg));
 3531   match(RegP);
 3532   match(rax_RegP);
 3533   match(rbx_RegP);
 3534   match(rdi_RegP);
 3535   match(rsi_RegP);
 3536   match(rbp_RegP);  // See Q&A below about
 3537   match(r15_RegP);  // r15_RegP and rbp_RegP.
 3538 
 3539   format %{ %}
 3540   interface(REG_INTER);
 3541 %}
 3542 
 3543 operand rRegN() %{
 3544   constraint(ALLOC_IN_RC(int_reg));
 3545   match(RegN);
 3546 
 3547   format %{ %}
 3548   interface(REG_INTER);
 3549 %}
 3550 
 3551 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 3552 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 3553 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 3554 // The output of an instruction is controlled by the allocator, which respects
 3555 // register class masks, not match rules.  Unless an instruction mentions
 3556 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 3557 // by the allocator as an input.
 3558 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 3559 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 3560 // result, RBP is not included in the output of the instruction either.
 3561 
 3562 operand no_rax_RegP()
 3563 %{
 3564   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 3565   match(RegP);
 3566   match(rbx_RegP);
 3567   match(rsi_RegP);
 3568   match(rdi_RegP);
 3569 
 3570   format %{ %}
 3571   interface(REG_INTER);
 3572 %}
 3573 
 3574 // This operand is not allowed to use RBP even if
 3575 // RBP is not used to hold the frame pointer.
 3576 operand no_rbp_RegP()
 3577 %{
 3578   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 3579   match(RegP);
 3580   match(rbx_RegP);
 3581   match(rsi_RegP);
 3582   match(rdi_RegP);
 3583 
 3584   format %{ %}
 3585   interface(REG_INTER);
 3586 %}
 3587 
 3588 operand no_rax_rbx_RegP()
 3589 %{
 3590   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 3591   match(RegP);
 3592   match(rsi_RegP);
 3593   match(rdi_RegP);
 3594 
 3595   format %{ %}
 3596   interface(REG_INTER);
 3597 %}
 3598 
 3599 // Special Registers
 3600 // Return a pointer value
 3601 operand rax_RegP()
 3602 %{
 3603   constraint(ALLOC_IN_RC(ptr_rax_reg));
 3604   match(RegP);
 3605   match(rRegP);
 3606 
 3607   format %{ %}
 3608   interface(REG_INTER);
 3609 %}
 3610 
 3611 // Special Registers
 3612 // Return a compressed pointer value
 3613 operand rax_RegN()
 3614 %{
 3615   constraint(ALLOC_IN_RC(int_rax_reg));
 3616   match(RegN);
 3617   match(rRegN);
 3618 
 3619   format %{ %}
 3620   interface(REG_INTER);
 3621 %}
 3622 
 3623 // Used in AtomicAdd
 3624 operand rbx_RegP()
 3625 %{
 3626   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 3627   match(RegP);
 3628   match(rRegP);
 3629 
 3630   format %{ %}
 3631   interface(REG_INTER);
 3632 %}
 3633 
 3634 operand rsi_RegP()
 3635 %{
 3636   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 3637   match(RegP);
 3638   match(rRegP);
 3639 
 3640   format %{ %}
 3641   interface(REG_INTER);
 3642 %}
 3643 
 3644 operand rbp_RegP()
 3645 %{
 3646   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 3647   match(RegP);
 3648   match(rRegP);
 3649 
 3650   format %{ %}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 // Used in rep stosq
 3655 operand rdi_RegP()
 3656 %{
 3657   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 3658   match(RegP);
 3659   match(rRegP);
 3660 
 3661   format %{ %}
 3662   interface(REG_INTER);
 3663 %}
 3664 
 3665 operand r15_RegP()
 3666 %{
 3667   constraint(ALLOC_IN_RC(ptr_r15_reg));
 3668   match(RegP);
 3669   match(rRegP);
 3670 
 3671   format %{ %}
 3672   interface(REG_INTER);
 3673 %}
 3674 
 3675 operand rRegL()
 3676 %{
 3677   constraint(ALLOC_IN_RC(long_reg));
 3678   match(RegL);
 3679   match(rax_RegL);
 3680   match(rdx_RegL);
 3681 
 3682   format %{ %}
 3683   interface(REG_INTER);
 3684 %}
 3685 
 3686 // Special Registers
 3687 operand no_rax_rdx_RegL()
 3688 %{
 3689   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 3690   match(RegL);
 3691   match(rRegL);
 3692 
 3693   format %{ %}
 3694   interface(REG_INTER);
 3695 %}
 3696 
 3697 operand rax_RegL()
 3698 %{
 3699   constraint(ALLOC_IN_RC(long_rax_reg));
 3700   match(RegL);
 3701   match(rRegL);
 3702 
 3703   format %{ "RAX" %}
 3704   interface(REG_INTER);
 3705 %}
 3706 
 3707 operand rcx_RegL()
 3708 %{
 3709   constraint(ALLOC_IN_RC(long_rcx_reg));
 3710   match(RegL);
 3711   match(rRegL);
 3712 
 3713   format %{ %}
 3714   interface(REG_INTER);
 3715 %}
 3716 
 3717 operand rdx_RegL()
 3718 %{
 3719   constraint(ALLOC_IN_RC(long_rdx_reg));
 3720   match(RegL);
 3721   match(rRegL);
 3722 
 3723   format %{ %}
 3724   interface(REG_INTER);
 3725 %}
 3726 
 3727 operand no_rbp_r13_RegL()
 3728 %{
 3729   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 3730   match(RegL);
 3731   match(rRegL);
 3732   match(rax_RegL);
 3733   match(rcx_RegL);
 3734   match(rdx_RegL);
 3735 
 3736   format %{ %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 // Flags register, used as output of compare instructions
 3741 operand rFlagsReg()
 3742 %{
 3743   constraint(ALLOC_IN_RC(int_flags));
 3744   match(RegFlags);
 3745 
 3746   format %{ "RFLAGS" %}
 3747   interface(REG_INTER);
 3748 %}
 3749 
 3750 // Flags register, used as output of FLOATING POINT compare instructions
 3751 operand rFlagsRegU()
 3752 %{
 3753   constraint(ALLOC_IN_RC(int_flags));
 3754   match(RegFlags);
 3755 
 3756   format %{ "RFLAGS_U" %}
 3757   interface(REG_INTER);
 3758 %}
 3759 
 3760 operand rFlagsRegUCF() %{
 3761   constraint(ALLOC_IN_RC(int_flags));
 3762   match(RegFlags);
 3763   predicate(false);
 3764 
 3765   format %{ "RFLAGS_U_CF" %}
 3766   interface(REG_INTER);
 3767 %}
 3768 
 3769 // Float register operands
 3770 operand regF() %{
 3771    constraint(ALLOC_IN_RC(float_reg));
 3772    match(RegF);
 3773 
 3774    format %{ %}
 3775    interface(REG_INTER);
 3776 %}
 3777 
 3778 // Float register operands
 3779 operand legRegF() %{
 3780    constraint(ALLOC_IN_RC(float_reg_legacy));
 3781    match(RegF);
 3782 
 3783    format %{ %}
 3784    interface(REG_INTER);
 3785 %}
 3786 
 3787 // Float register operands
 3788 operand vlRegF() %{
 3789    constraint(ALLOC_IN_RC(float_reg_vl));
 3790    match(RegF);
 3791 
 3792    format %{ %}
 3793    interface(REG_INTER);
 3794 %}
 3795 
 3796 // Double register operands
 3797 operand regD() %{
 3798    constraint(ALLOC_IN_RC(double_reg));
 3799    match(RegD);
 3800 
 3801    format %{ %}
 3802    interface(REG_INTER);
 3803 %}
 3804 
 3805 // Double register operands
 3806 operand legRegD() %{
 3807    constraint(ALLOC_IN_RC(double_reg_legacy));
 3808    match(RegD);
 3809 
 3810    format %{ %}
 3811    interface(REG_INTER);
 3812 %}
 3813 
 3814 // Double register operands
 3815 operand vlRegD() %{
 3816    constraint(ALLOC_IN_RC(double_reg_vl));
 3817    match(RegD);
 3818 
 3819    format %{ %}
 3820    interface(REG_INTER);
 3821 %}
 3822 
 3823 //----------Memory Operands----------------------------------------------------
 3824 // Direct Memory Operand
 3825 // operand direct(immP addr)
 3826 // %{
 3827 //   match(addr);
 3828 
 3829 //   format %{ "[$addr]" %}
 3830 //   interface(MEMORY_INTER) %{
 3831 //     base(0xFFFFFFFF);
 3832 //     index(0x4);
 3833 //     scale(0x0);
 3834 //     disp($addr);
 3835 //   %}
 3836 // %}
 3837 
 3838 // Indirect Memory Operand
 3839 operand indirect(any_RegP reg)
 3840 %{
 3841   constraint(ALLOC_IN_RC(ptr_reg));
 3842   match(reg);
 3843 
 3844   format %{ "[$reg]" %}
 3845   interface(MEMORY_INTER) %{
 3846     base($reg);
 3847     index(0x4);
 3848     scale(0x0);
 3849     disp(0x0);
 3850   %}
 3851 %}
 3852 
 3853 // Indirect Memory Plus Short Offset Operand
 3854 operand indOffset8(any_RegP reg, immL8 off)
 3855 %{
 3856   constraint(ALLOC_IN_RC(ptr_reg));
 3857   match(AddP reg off);
 3858 
 3859   format %{ "[$reg + $off (8-bit)]" %}
 3860   interface(MEMORY_INTER) %{
 3861     base($reg);
 3862     index(0x4);
 3863     scale(0x0);
 3864     disp($off);
 3865   %}
 3866 %}
 3867 
 3868 // Indirect Memory Plus Long Offset Operand
 3869 operand indOffset32(any_RegP reg, immL32 off)
 3870 %{
 3871   constraint(ALLOC_IN_RC(ptr_reg));
 3872   match(AddP reg off);
 3873 
 3874   format %{ "[$reg + $off (32-bit)]" %}
 3875   interface(MEMORY_INTER) %{
 3876     base($reg);
 3877     index(0x4);
 3878     scale(0x0);
 3879     disp($off);
 3880   %}
 3881 %}
 3882 
 3883 // Indirect Memory Plus Index Register Plus Offset Operand
 3884 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3885 %{
 3886   constraint(ALLOC_IN_RC(ptr_reg));
 3887   match(AddP (AddP reg lreg) off);
 3888 
 3889   op_cost(10);
 3890   format %{"[$reg + $off + $lreg]" %}
 3891   interface(MEMORY_INTER) %{
 3892     base($reg);
 3893     index($lreg);
 3894     scale(0x0);
 3895     disp($off);
 3896   %}
 3897 %}
 3898 
 3899 // Indirect Memory Plus Index Register Plus Offset Operand
 3900 operand indIndex(any_RegP reg, rRegL lreg)
 3901 %{
 3902   constraint(ALLOC_IN_RC(ptr_reg));
 3903   match(AddP reg lreg);
 3904 
 3905   op_cost(10);
 3906   format %{"[$reg + $lreg]" %}
 3907   interface(MEMORY_INTER) %{
 3908     base($reg);
 3909     index($lreg);
 3910     scale(0x0);
 3911     disp(0x0);
 3912   %}
 3913 %}
 3914 
 3915 // Indirect Memory Times Scale Plus Index Register
 3916 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3917 %{
 3918   constraint(ALLOC_IN_RC(ptr_reg));
 3919   match(AddP reg (LShiftL lreg scale));
 3920 
 3921   op_cost(10);
 3922   format %{"[$reg + $lreg << $scale]" %}
 3923   interface(MEMORY_INTER) %{
 3924     base($reg);
 3925     index($lreg);
 3926     scale($scale);
 3927     disp(0x0);
 3928   %}
 3929 %}
 3930 
 3931 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3932 %{
 3933   constraint(ALLOC_IN_RC(ptr_reg));
 3934   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3935   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3936 
 3937   op_cost(10);
 3938   format %{"[$reg + pos $idx << $scale]" %}
 3939   interface(MEMORY_INTER) %{
 3940     base($reg);
 3941     index($idx);
 3942     scale($scale);
 3943     disp(0x0);
 3944   %}
 3945 %}
 3946 
 3947 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3948 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3949 %{
 3950   constraint(ALLOC_IN_RC(ptr_reg));
 3951   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3952 
 3953   op_cost(10);
 3954   format %{"[$reg + $off + $lreg << $scale]" %}
 3955   interface(MEMORY_INTER) %{
 3956     base($reg);
 3957     index($lreg);
 3958     scale($scale);
 3959     disp($off);
 3960   %}
 3961 %}
 3962 
 3963 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3964 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3965 %{
 3966   constraint(ALLOC_IN_RC(ptr_reg));
 3967   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3968   match(AddP (AddP reg (ConvI2L idx)) off);
 3969 
 3970   op_cost(10);
 3971   format %{"[$reg + $off + $idx]" %}
 3972   interface(MEMORY_INTER) %{
 3973     base($reg);
 3974     index($idx);
 3975     scale(0x0);
 3976     disp($off);
 3977   %}
 3978 %}
 3979 
 3980 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3981 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3982 %{
 3983   constraint(ALLOC_IN_RC(ptr_reg));
 3984   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3985   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3986 
 3987   op_cost(10);
 3988   format %{"[$reg + $off + $idx << $scale]" %}
 3989   interface(MEMORY_INTER) %{
 3990     base($reg);
 3991     index($idx);
 3992     scale($scale);
 3993     disp($off);
 3994   %}
 3995 %}
 3996 
 3997 // Indirect Narrow Oop Plus Offset Operand
 3998 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3999 // we can't free r12 even with CompressedOops::base() == NULL.
 4000 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4001   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4002   constraint(ALLOC_IN_RC(ptr_reg));
 4003   match(AddP (DecodeN reg) off);
 4004 
 4005   op_cost(10);
 4006   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4007   interface(MEMORY_INTER) %{
 4008     base(0xc); // R12
 4009     index($reg);
 4010     scale(0x3);
 4011     disp($off);
 4012   %}
 4013 %}
 4014 
 4015 // Indirect Memory Operand
 4016 operand indirectNarrow(rRegN reg)
 4017 %{
 4018   predicate(CompressedOops::shift() == 0);
 4019   constraint(ALLOC_IN_RC(ptr_reg));
 4020   match(DecodeN reg);
 4021 
 4022   format %{ "[$reg]" %}
 4023   interface(MEMORY_INTER) %{
 4024     base($reg);
 4025     index(0x4);
 4026     scale(0x0);
 4027     disp(0x0);
 4028   %}
 4029 %}
 4030 
 4031 // Indirect Memory Plus Short Offset Operand
 4032 operand indOffset8Narrow(rRegN reg, immL8 off)
 4033 %{
 4034   predicate(CompressedOops::shift() == 0);
 4035   constraint(ALLOC_IN_RC(ptr_reg));
 4036   match(AddP (DecodeN reg) off);
 4037 
 4038   format %{ "[$reg + $off (8-bit)]" %}
 4039   interface(MEMORY_INTER) %{
 4040     base($reg);
 4041     index(0x4);
 4042     scale(0x0);
 4043     disp($off);
 4044   %}
 4045 %}
 4046 
 4047 // Indirect Memory Plus Long Offset Operand
 4048 operand indOffset32Narrow(rRegN reg, immL32 off)
 4049 %{
 4050   predicate(CompressedOops::shift() == 0);
 4051   constraint(ALLOC_IN_RC(ptr_reg));
 4052   match(AddP (DecodeN reg) off);
 4053 
 4054   format %{ "[$reg + $off (32-bit)]" %}
 4055   interface(MEMORY_INTER) %{
 4056     base($reg);
 4057     index(0x4);
 4058     scale(0x0);
 4059     disp($off);
 4060   %}
 4061 %}
 4062 
 4063 // Indirect Memory Plus Index Register Plus Offset Operand
 4064 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 4065 %{
 4066   predicate(CompressedOops::shift() == 0);
 4067   constraint(ALLOC_IN_RC(ptr_reg));
 4068   match(AddP (AddP (DecodeN reg) lreg) off);
 4069 
 4070   op_cost(10);
 4071   format %{"[$reg + $off + $lreg]" %}
 4072   interface(MEMORY_INTER) %{
 4073     base($reg);
 4074     index($lreg);
 4075     scale(0x0);
 4076     disp($off);
 4077   %}
 4078 %}
 4079 
 4080 // Indirect Memory Plus Index Register Plus Offset Operand
 4081 operand indIndexNarrow(rRegN reg, rRegL lreg)
 4082 %{
 4083   predicate(CompressedOops::shift() == 0);
 4084   constraint(ALLOC_IN_RC(ptr_reg));
 4085   match(AddP (DecodeN reg) lreg);
 4086 
 4087   op_cost(10);
 4088   format %{"[$reg + $lreg]" %}
 4089   interface(MEMORY_INTER) %{
 4090     base($reg);
 4091     index($lreg);
 4092     scale(0x0);
 4093     disp(0x0);
 4094   %}
 4095 %}
 4096 
 4097 // Indirect Memory Times Scale Plus Index Register
 4098 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 4099 %{
 4100   predicate(CompressedOops::shift() == 0);
 4101   constraint(ALLOC_IN_RC(ptr_reg));
 4102   match(AddP (DecodeN reg) (LShiftL lreg scale));
 4103 
 4104   op_cost(10);
 4105   format %{"[$reg + $lreg << $scale]" %}
 4106   interface(MEMORY_INTER) %{
 4107     base($reg);
 4108     index($lreg);
 4109     scale($scale);
 4110     disp(0x0);
 4111   %}
 4112 %}
 4113 
 4114 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4115 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 4116 %{
 4117   predicate(CompressedOops::shift() == 0);
 4118   constraint(ALLOC_IN_RC(ptr_reg));
 4119   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 4120 
 4121   op_cost(10);
 4122   format %{"[$reg + $off + $lreg << $scale]" %}
 4123   interface(MEMORY_INTER) %{
 4124     base($reg);
 4125     index($lreg);
 4126     scale($scale);
 4127     disp($off);
 4128   %}
 4129 %}
 4130 
 4131 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 4132 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 4133 %{
 4134   constraint(ALLOC_IN_RC(ptr_reg));
 4135   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 4136   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 4137 
 4138   op_cost(10);
 4139   format %{"[$reg + $off + $idx]" %}
 4140   interface(MEMORY_INTER) %{
 4141     base($reg);
 4142     index($idx);
 4143     scale(0x0);
 4144     disp($off);
 4145   %}
 4146 %}
 4147 
 4148 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4149 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 4150 %{
 4151   constraint(ALLOC_IN_RC(ptr_reg));
 4152   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4153   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 4154 
 4155   op_cost(10);
 4156   format %{"[$reg + $off + $idx << $scale]" %}
 4157   interface(MEMORY_INTER) %{
 4158     base($reg);
 4159     index($idx);
 4160     scale($scale);
 4161     disp($off);
 4162   %}
 4163 %}
 4164 
 4165 //----------Special Memory Operands--------------------------------------------
 4166 // Stack Slot Operand - This operand is used for loading and storing temporary
 4167 //                      values on the stack where a match requires a value to
 4168 //                      flow through memory.
 4169 operand stackSlotP(sRegP reg)
 4170 %{
 4171   constraint(ALLOC_IN_RC(stack_slots));
 4172   // No match rule because this operand is only generated in matching
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base(0x4);   // RSP
 4177     index(0x4);  // No Index
 4178     scale(0x0);  // No Scale
 4179     disp($reg);  // Stack Offset
 4180   %}
 4181 %}
 4182 
 4183 operand stackSlotI(sRegI reg)
 4184 %{
 4185   constraint(ALLOC_IN_RC(stack_slots));
 4186   // No match rule because this operand is only generated in matching
 4187 
 4188   format %{ "[$reg]" %}
 4189   interface(MEMORY_INTER) %{
 4190     base(0x4);   // RSP
 4191     index(0x4);  // No Index
 4192     scale(0x0);  // No Scale
 4193     disp($reg);  // Stack Offset
 4194   %}
 4195 %}
 4196 
 4197 operand stackSlotF(sRegF reg)
 4198 %{
 4199   constraint(ALLOC_IN_RC(stack_slots));
 4200   // No match rule because this operand is only generated in matching
 4201 
 4202   format %{ "[$reg]" %}
 4203   interface(MEMORY_INTER) %{
 4204     base(0x4);   // RSP
 4205     index(0x4);  // No Index
 4206     scale(0x0);  // No Scale
 4207     disp($reg);  // Stack Offset
 4208   %}
 4209 %}
 4210 
 4211 operand stackSlotD(sRegD reg)
 4212 %{
 4213   constraint(ALLOC_IN_RC(stack_slots));
 4214   // No match rule because this operand is only generated in matching
 4215 
 4216   format %{ "[$reg]" %}
 4217   interface(MEMORY_INTER) %{
 4218     base(0x4);   // RSP
 4219     index(0x4);  // No Index
 4220     scale(0x0);  // No Scale
 4221     disp($reg);  // Stack Offset
 4222   %}
 4223 %}
 4224 operand stackSlotL(sRegL reg)
 4225 %{
 4226   constraint(ALLOC_IN_RC(stack_slots));
 4227   // No match rule because this operand is only generated in matching
 4228 
 4229   format %{ "[$reg]" %}
 4230   interface(MEMORY_INTER) %{
 4231     base(0x4);   // RSP
 4232     index(0x4);  // No Index
 4233     scale(0x0);  // No Scale
 4234     disp($reg);  // Stack Offset
 4235   %}
 4236 %}
 4237 
 4238 //----------Conditional Branch Operands----------------------------------------
 4239 // Comparison Op  - This is the operation of the comparison, and is limited to
 4240 //                  the following set of codes:
 4241 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4242 //
 4243 // Other attributes of the comparison, such as unsignedness, are specified
 4244 // by the comparison instruction that sets a condition code flags register.
 4245 // That result is represented by a flags operand whose subtype is appropriate
 4246 // to the unsignedness (etc.) of the comparison.
 4247 //
 4248 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4249 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4250 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4251 
 4252 // Comparison Code
 4253 operand cmpOp()
 4254 %{
 4255   match(Bool);
 4256 
 4257   format %{ "" %}
 4258   interface(COND_INTER) %{
 4259     equal(0x4, "e");
 4260     not_equal(0x5, "ne");
 4261     less(0xC, "l");
 4262     greater_equal(0xD, "ge");
 4263     less_equal(0xE, "le");
 4264     greater(0xF, "g");
 4265     overflow(0x0, "o");
 4266     no_overflow(0x1, "no");
 4267   %}
 4268 %}
 4269 
 4270 // Comparison Code, unsigned compare.  Used by FP also, with
 4271 // C2 (unordered) turned into GT or LT already.  The other bits
 4272 // C0 and C3 are turned into Carry & Zero flags.
 4273 operand cmpOpU()
 4274 %{
 4275   match(Bool);
 4276 
 4277   format %{ "" %}
 4278   interface(COND_INTER) %{
 4279     equal(0x4, "e");
 4280     not_equal(0x5, "ne");
 4281     less(0x2, "b");
 4282     greater_equal(0x3, "ae");
 4283     less_equal(0x6, "be");
 4284     greater(0x7, "a");
 4285     overflow(0x0, "o");
 4286     no_overflow(0x1, "no");
 4287   %}
 4288 %}
 4289 
 4290 
 4291 // Floating comparisons that don't require any fixup for the unordered case,
 4292 // If both inputs of the comparison are the same, ZF is always set so we
 4293 // don't need to use cmpOpUCF2 for eq/ne
 4294 operand cmpOpUCF() %{
 4295   match(Bool);
 4296   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4297             n->as_Bool()->_test._test == BoolTest::ge ||
 4298             n->as_Bool()->_test._test == BoolTest::le ||
 4299             n->as_Bool()->_test._test == BoolTest::gt ||
 4300             n->in(1)->in(1) == n->in(1)->in(2));
 4301   format %{ "" %}
 4302   interface(COND_INTER) %{
 4303     equal(0xb, "np");
 4304     not_equal(0xa, "p");
 4305     less(0x2, "b");
 4306     greater_equal(0x3, "ae");
 4307     less_equal(0x6, "be");
 4308     greater(0x7, "a");
 4309     overflow(0x0, "o");
 4310     no_overflow(0x1, "no");
 4311   %}
 4312 %}
 4313 
 4314 
 4315 // Floating comparisons that can be fixed up with extra conditional jumps
 4316 operand cmpOpUCF2() %{
 4317   match(Bool);
 4318   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 4319              n->as_Bool()->_test._test == BoolTest::eq) &&
 4320             n->in(1)->in(1) != n->in(1)->in(2));
 4321   format %{ "" %}
 4322   interface(COND_INTER) %{
 4323     equal(0x4, "e");
 4324     not_equal(0x5, "ne");
 4325     less(0x2, "b");
 4326     greater_equal(0x3, "ae");
 4327     less_equal(0x6, "be");
 4328     greater(0x7, "a");
 4329     overflow(0x0, "o");
 4330     no_overflow(0x1, "no");
 4331   %}
 4332 %}
 4333 
 4334 //----------OPERAND CLASSES----------------------------------------------------
 4335 // Operand Classes are groups of operands that are used as to simplify
 4336 // instruction definitions by not requiring the AD writer to specify separate
 4337 // instructions for every form of operand when the instruction accepts
 4338 // multiple operand types with the same basic encoding and format.  The classic
 4339 // case of this is memory operands.
 4340 
 4341 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4342                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4343                indCompressedOopOffset,
 4344                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4345                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4346                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4347 
 4348 //----------PIPELINE-----------------------------------------------------------
 4349 // Rules which define the behavior of the target architectures pipeline.
 4350 pipeline %{
 4351 
 4352 //----------ATTRIBUTES---------------------------------------------------------
 4353 attributes %{
 4354   variable_size_instructions;        // Fixed size instructions
 4355   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4356   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4357   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4358   instruction_fetch_units = 1;       // of 16 bytes
 4359 
 4360   // List of nop instructions
 4361   nops( MachNop );
 4362 %}
 4363 
 4364 //----------RESOURCES----------------------------------------------------------
 4365 // Resources are the functional units available to the machine
 4366 
 4367 // Generic P2/P3 pipeline
 4368 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4369 // 3 instructions decoded per cycle.
 4370 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4371 // 3 ALU op, only ALU0 handles mul instructions.
 4372 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4373            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 4374            BR, FPU,
 4375            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 4376 
 4377 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4378 // Pipeline Description specifies the stages in the machine's pipeline
 4379 
 4380 // Generic P2/P3 pipeline
 4381 pipe_desc(S0, S1, S2, S3, S4, S5);
 4382 
 4383 //----------PIPELINE CLASSES---------------------------------------------------
 4384 // Pipeline Classes describe the stages in which input and output are
 4385 // referenced by the hardware pipeline.
 4386 
 4387 // Naming convention: ialu or fpu
 4388 // Then: _reg
 4389 // Then: _reg if there is a 2nd register
 4390 // Then: _long if it's a pair of instructions implementing a long
 4391 // Then: _fat if it requires the big decoder
 4392 //   Or: _mem if it requires the big decoder and a memory unit.
 4393 
 4394 // Integer ALU reg operation
 4395 pipe_class ialu_reg(rRegI dst)
 4396 %{
 4397     single_instruction;
 4398     dst    : S4(write);
 4399     dst    : S3(read);
 4400     DECODE : S0;        // any decoder
 4401     ALU    : S3;        // any alu
 4402 %}
 4403 
 4404 // Long ALU reg operation
 4405 pipe_class ialu_reg_long(rRegL dst)
 4406 %{
 4407     instruction_count(2);
 4408     dst    : S4(write);
 4409     dst    : S3(read);
 4410     DECODE : S0(2);     // any 2 decoders
 4411     ALU    : S3(2);     // both alus
 4412 %}
 4413 
 4414 // Integer ALU reg operation using big decoder
 4415 pipe_class ialu_reg_fat(rRegI dst)
 4416 %{
 4417     single_instruction;
 4418     dst    : S4(write);
 4419     dst    : S3(read);
 4420     D0     : S0;        // big decoder only
 4421     ALU    : S3;        // any alu
 4422 %}
 4423 
 4424 // Integer ALU reg-reg operation
 4425 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 4426 %{
 4427     single_instruction;
 4428     dst    : S4(write);
 4429     src    : S3(read);
 4430     DECODE : S0;        // any decoder
 4431     ALU    : S3;        // any alu
 4432 %}
 4433 
 4434 // Integer ALU reg-reg operation
 4435 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 4436 %{
 4437     single_instruction;
 4438     dst    : S4(write);
 4439     src    : S3(read);
 4440     D0     : S0;        // big decoder only
 4441     ALU    : S3;        // any alu
 4442 %}
 4443 
 4444 // Integer ALU reg-mem operation
 4445 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 4446 %{
 4447     single_instruction;
 4448     dst    : S5(write);
 4449     mem    : S3(read);
 4450     D0     : S0;        // big decoder only
 4451     ALU    : S4;        // any alu
 4452     MEM    : S3;        // any mem
 4453 %}
 4454 
 4455 // Integer mem operation (prefetch)
 4456 pipe_class ialu_mem(memory mem)
 4457 %{
 4458     single_instruction;
 4459     mem    : S3(read);
 4460     D0     : S0;        // big decoder only
 4461     MEM    : S3;        // any mem
 4462 %}
 4463 
 4464 // Integer Store to Memory
 4465 pipe_class ialu_mem_reg(memory mem, rRegI src)
 4466 %{
 4467     single_instruction;
 4468     mem    : S3(read);
 4469     src    : S5(read);
 4470     D0     : S0;        // big decoder only
 4471     ALU    : S4;        // any alu
 4472     MEM    : S3;
 4473 %}
 4474 
 4475 // // Long Store to Memory
 4476 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 4477 // %{
 4478 //     instruction_count(2);
 4479 //     mem    : S3(read);
 4480 //     src    : S5(read);
 4481 //     D0     : S0(2);          // big decoder only; twice
 4482 //     ALU    : S4(2);     // any 2 alus
 4483 //     MEM    : S3(2);  // Both mems
 4484 // %}
 4485 
 4486 // Integer Store to Memory
 4487 pipe_class ialu_mem_imm(memory mem)
 4488 %{
 4489     single_instruction;
 4490     mem    : S3(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S4;        // any alu
 4493     MEM    : S3;
 4494 %}
 4495 
 4496 // Integer ALU0 reg-reg operation
 4497 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 4498 %{
 4499     single_instruction;
 4500     dst    : S4(write);
 4501     src    : S3(read);
 4502     D0     : S0;        // Big decoder only
 4503     ALU0   : S3;        // only alu0
 4504 %}
 4505 
 4506 // Integer ALU0 reg-mem operation
 4507 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 4508 %{
 4509     single_instruction;
 4510     dst    : S5(write);
 4511     mem    : S3(read);
 4512     D0     : S0;        // big decoder only
 4513     ALU0   : S4;        // ALU0 only
 4514     MEM    : S3;        // any mem
 4515 %}
 4516 
 4517 // Integer ALU reg-reg operation
 4518 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 4519 %{
 4520     single_instruction;
 4521     cr     : S4(write);
 4522     src1   : S3(read);
 4523     src2   : S3(read);
 4524     DECODE : S0;        // any decoder
 4525     ALU    : S3;        // any alu
 4526 %}
 4527 
 4528 // Integer ALU reg-imm operation
 4529 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 4530 %{
 4531     single_instruction;
 4532     cr     : S4(write);
 4533     src1   : S3(read);
 4534     DECODE : S0;        // any decoder
 4535     ALU    : S3;        // any alu
 4536 %}
 4537 
 4538 // Integer ALU reg-mem operation
 4539 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 4540 %{
 4541     single_instruction;
 4542     cr     : S4(write);
 4543     src1   : S3(read);
 4544     src2   : S3(read);
 4545     D0     : S0;        // big decoder only
 4546     ALU    : S4;        // any alu
 4547     MEM    : S3;
 4548 %}
 4549 
 4550 // Conditional move reg-reg
 4551 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 4552 %{
 4553     instruction_count(4);
 4554     y      : S4(read);
 4555     q      : S3(read);
 4556     p      : S3(read);
 4557     DECODE : S0(4);     // any decoder
 4558 %}
 4559 
 4560 // Conditional move reg-reg
 4561 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 4562 %{
 4563     single_instruction;
 4564     dst    : S4(write);
 4565     src    : S3(read);
 4566     cr     : S3(read);
 4567     DECODE : S0;        // any decoder
 4568 %}
 4569 
 4570 // Conditional move reg-mem
 4571 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 4572 %{
 4573     single_instruction;
 4574     dst    : S4(write);
 4575     src    : S3(read);
 4576     cr     : S3(read);
 4577     DECODE : S0;        // any decoder
 4578     MEM    : S3;
 4579 %}
 4580 
 4581 // Conditional move reg-reg long
 4582 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 4583 %{
 4584     single_instruction;
 4585     dst    : S4(write);
 4586     src    : S3(read);
 4587     cr     : S3(read);
 4588     DECODE : S0(2);     // any 2 decoders
 4589 %}
 4590 
 4591 // XXX
 4592 // // Conditional move double reg-reg
 4593 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
 4594 // %{
 4595 //     single_instruction;
 4596 //     dst    : S4(write);
 4597 //     src    : S3(read);
 4598 //     cr     : S3(read);
 4599 //     DECODE : S0;     // any decoder
 4600 // %}
 4601 
 4602 // Float reg-reg operation
 4603 pipe_class fpu_reg(regD dst)
 4604 %{
 4605     instruction_count(2);
 4606     dst    : S3(read);
 4607     DECODE : S0(2);     // any 2 decoders
 4608     FPU    : S3;
 4609 %}
 4610 
 4611 // Float reg-reg operation
 4612 pipe_class fpu_reg_reg(regD dst, regD src)
 4613 %{
 4614     instruction_count(2);
 4615     dst    : S4(write);
 4616     src    : S3(read);
 4617     DECODE : S0(2);     // any 2 decoders
 4618     FPU    : S3;
 4619 %}
 4620 
 4621 // Float reg-reg operation
 4622 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 4623 %{
 4624     instruction_count(3);
 4625     dst    : S4(write);
 4626     src1   : S3(read);
 4627     src2   : S3(read);
 4628     DECODE : S0(3);     // any 3 decoders
 4629     FPU    : S3(2);
 4630 %}
 4631 
 4632 // Float reg-reg operation
 4633 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 4634 %{
 4635     instruction_count(4);
 4636     dst    : S4(write);
 4637     src1   : S3(read);
 4638     src2   : S3(read);
 4639     src3   : S3(read);
 4640     DECODE : S0(4);     // any 3 decoders
 4641     FPU    : S3(2);
 4642 %}
 4643 
 4644 // Float reg-reg operation
 4645 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 4646 %{
 4647     instruction_count(4);
 4648     dst    : S4(write);
 4649     src1   : S3(read);
 4650     src2   : S3(read);
 4651     src3   : S3(read);
 4652     DECODE : S1(3);     // any 3 decoders
 4653     D0     : S0;        // Big decoder only
 4654     FPU    : S3(2);
 4655     MEM    : S3;
 4656 %}
 4657 
 4658 // Float reg-mem operation
 4659 pipe_class fpu_reg_mem(regD dst, memory mem)
 4660 %{
 4661     instruction_count(2);
 4662     dst    : S5(write);
 4663     mem    : S3(read);
 4664     D0     : S0;        // big decoder only
 4665     DECODE : S1;        // any decoder for FPU POP
 4666     FPU    : S4;
 4667     MEM    : S3;        // any mem
 4668 %}
 4669 
 4670 // Float reg-mem operation
 4671 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 4672 %{
 4673     instruction_count(3);
 4674     dst    : S5(write);
 4675     src1   : S3(read);
 4676     mem    : S3(read);
 4677     D0     : S0;        // big decoder only
 4678     DECODE : S1(2);     // any decoder for FPU POP
 4679     FPU    : S4;
 4680     MEM    : S3;        // any mem
 4681 %}
 4682 
 4683 // Float mem-reg operation
 4684 pipe_class fpu_mem_reg(memory mem, regD src)
 4685 %{
 4686     instruction_count(2);
 4687     src    : S5(read);
 4688     mem    : S3(read);
 4689     DECODE : S0;        // any decoder for FPU PUSH
 4690     D0     : S1;        // big decoder only
 4691     FPU    : S4;
 4692     MEM    : S3;        // any mem
 4693 %}
 4694 
 4695 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 4696 %{
 4697     instruction_count(3);
 4698     src1   : S3(read);
 4699     src2   : S3(read);
 4700     mem    : S3(read);
 4701     DECODE : S0(2);     // any decoder for FPU PUSH
 4702     D0     : S1;        // big decoder only
 4703     FPU    : S4;
 4704     MEM    : S3;        // any mem
 4705 %}
 4706 
 4707 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 4708 %{
 4709     instruction_count(3);
 4710     src1   : S3(read);
 4711     src2   : S3(read);
 4712     mem    : S4(read);
 4713     DECODE : S0;        // any decoder for FPU PUSH
 4714     D0     : S0(2);     // big decoder only
 4715     FPU    : S4;
 4716     MEM    : S3(2);     // any mem
 4717 %}
 4718 
 4719 pipe_class fpu_mem_mem(memory dst, memory src1)
 4720 %{
 4721     instruction_count(2);
 4722     src1   : S3(read);
 4723     dst    : S4(read);
 4724     D0     : S0(2);     // big decoder only
 4725     MEM    : S3(2);     // any mem
 4726 %}
 4727 
 4728 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 4729 %{
 4730     instruction_count(3);
 4731     src1   : S3(read);
 4732     src2   : S3(read);
 4733     dst    : S4(read);
 4734     D0     : S0(3);     // big decoder only
 4735     FPU    : S4;
 4736     MEM    : S3(3);     // any mem
 4737 %}
 4738 
 4739 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 4740 %{
 4741     instruction_count(3);
 4742     src1   : S4(read);
 4743     mem    : S4(read);
 4744     DECODE : S0;        // any decoder for FPU PUSH
 4745     D0     : S0(2);     // big decoder only
 4746     FPU    : S4;
 4747     MEM    : S3(2);     // any mem
 4748 %}
 4749 
 4750 // Float load constant
 4751 pipe_class fpu_reg_con(regD dst)
 4752 %{
 4753     instruction_count(2);
 4754     dst    : S5(write);
 4755     D0     : S0;        // big decoder only for the load
 4756     DECODE : S1;        // any decoder for FPU POP
 4757     FPU    : S4;
 4758     MEM    : S3;        // any mem
 4759 %}
 4760 
 4761 // Float load constant
 4762 pipe_class fpu_reg_reg_con(regD dst, regD src)
 4763 %{
 4764     instruction_count(3);
 4765     dst    : S5(write);
 4766     src    : S3(read);
 4767     D0     : S0;        // big decoder only for the load
 4768     DECODE : S1(2);     // any decoder for FPU POP
 4769     FPU    : S4;
 4770     MEM    : S3;        // any mem
 4771 %}
 4772 
 4773 // UnConditional branch
 4774 pipe_class pipe_jmp(label labl)
 4775 %{
 4776     single_instruction;
 4777     BR   : S3;
 4778 %}
 4779 
 4780 // Conditional branch
 4781 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 4782 %{
 4783     single_instruction;
 4784     cr    : S1(read);
 4785     BR    : S3;
 4786 %}
 4787 
 4788 // Allocation idiom
 4789 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 4790 %{
 4791     instruction_count(1); force_serialization;
 4792     fixed_latency(6);
 4793     heap_ptr : S3(read);
 4794     DECODE   : S0(3);
 4795     D0       : S2;
 4796     MEM      : S3;
 4797     ALU      : S3(2);
 4798     dst      : S5(write);
 4799     BR       : S5;
 4800 %}
 4801 
 4802 // Generic big/slow expanded idiom
 4803 pipe_class pipe_slow()
 4804 %{
 4805     instruction_count(10); multiple_bundles; force_serialization;
 4806     fixed_latency(100);
 4807     D0  : S0(2);
 4808     MEM : S3(2);
 4809 %}
 4810 
 4811 // The real do-nothing guy
 4812 pipe_class empty()
 4813 %{
 4814     instruction_count(0);
 4815 %}
 4816 
 4817 // Define the class for the Nop node
 4818 define
 4819 %{
 4820    MachNop = empty;
 4821 %}
 4822 
 4823 %}
 4824 
 4825 //----------INSTRUCTIONS-------------------------------------------------------
 4826 //
 4827 // match      -- States which machine-independent subtree may be replaced
 4828 //               by this instruction.
 4829 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4830 //               selection to identify a minimum cost tree of machine
 4831 //               instructions that matches a tree of machine-independent
 4832 //               instructions.
 4833 // format     -- A string providing the disassembly for this instruction.
 4834 //               The value of an instruction's operand may be inserted
 4835 //               by referring to it with a '$' prefix.
 4836 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4837 //               to within an encode class as $primary, $secondary, and $tertiary
 4838 //               rrspectively.  The primary opcode is commonly used to
 4839 //               indicate the type of machine instruction, while secondary
 4840 //               and tertiary are often used for prefix options or addressing
 4841 //               modes.
 4842 // ins_encode -- A list of encode classes with parameters. The encode class
 4843 //               name must have been defined in an 'enc_class' specification
 4844 //               in the encode section of the architecture description.
 4845 
 4846 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4847 // Load Float
 4848 instruct MoveF2VL(vlRegF dst, regF src) %{
 4849   match(Set dst src);
 4850   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4851   ins_encode %{
 4852     ShouldNotReachHere();
 4853   %}
 4854   ins_pipe( fpu_reg_reg );
 4855 %}
 4856 
 4857 // Load Float
 4858 instruct MoveF2LEG(legRegF dst, regF src) %{
 4859   match(Set dst src);
 4860   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4861   ins_encode %{
 4862     ShouldNotReachHere();
 4863   %}
 4864   ins_pipe( fpu_reg_reg );
 4865 %}
 4866 
 4867 // Load Float
 4868 instruct MoveVL2F(regF dst, vlRegF src) %{
 4869   match(Set dst src);
 4870   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4871   ins_encode %{
 4872     ShouldNotReachHere();
 4873   %}
 4874   ins_pipe( fpu_reg_reg );
 4875 %}
 4876 
 4877 // Load Float
 4878 instruct MoveLEG2F(regF dst, legRegF src) %{
 4879   match(Set dst src);
 4880   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4881   ins_encode %{
 4882     ShouldNotReachHere();
 4883   %}
 4884   ins_pipe( fpu_reg_reg );
 4885 %}
 4886 
 4887 // Load Double
 4888 instruct MoveD2VL(vlRegD dst, regD src) %{
 4889   match(Set dst src);
 4890   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4891   ins_encode %{
 4892     ShouldNotReachHere();
 4893   %}
 4894   ins_pipe( fpu_reg_reg );
 4895 %}
 4896 
 4897 // Load Double
 4898 instruct MoveD2LEG(legRegD dst, regD src) %{
 4899   match(Set dst src);
 4900   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4901   ins_encode %{
 4902     ShouldNotReachHere();
 4903   %}
 4904   ins_pipe( fpu_reg_reg );
 4905 %}
 4906 
 4907 // Load Double
 4908 instruct MoveVL2D(regD dst, vlRegD src) %{
 4909   match(Set dst src);
 4910   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4911   ins_encode %{
 4912     ShouldNotReachHere();
 4913   %}
 4914   ins_pipe( fpu_reg_reg );
 4915 %}
 4916 
 4917 // Load Double
 4918 instruct MoveLEG2D(regD dst, legRegD src) %{
 4919   match(Set dst src);
 4920   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4921   ins_encode %{
 4922     ShouldNotReachHere();
 4923   %}
 4924   ins_pipe( fpu_reg_reg );
 4925 %}
 4926 
 4927 //----------Load/Store/Move Instructions---------------------------------------
 4928 //----------Load Instructions--------------------------------------------------
 4929 
 4930 // Load Byte (8 bit signed)
 4931 instruct loadB(rRegI dst, memory mem)
 4932 %{
 4933   match(Set dst (LoadB mem));
 4934 
 4935   ins_cost(125);
 4936   format %{ "movsbl  $dst, $mem\t# byte" %}
 4937 
 4938   ins_encode %{
 4939     __ movsbl($dst$$Register, $mem$$Address);
 4940   %}
 4941 
 4942   ins_pipe(ialu_reg_mem);
 4943 %}
 4944 
 4945 // Load Byte (8 bit signed) into Long Register
 4946 instruct loadB2L(rRegL dst, memory mem)
 4947 %{
 4948   match(Set dst (ConvI2L (LoadB mem)));
 4949 
 4950   ins_cost(125);
 4951   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4952 
 4953   ins_encode %{
 4954     __ movsbq($dst$$Register, $mem$$Address);
 4955   %}
 4956 
 4957   ins_pipe(ialu_reg_mem);
 4958 %}
 4959 
 4960 // Load Unsigned Byte (8 bit UNsigned)
 4961 instruct loadUB(rRegI dst, memory mem)
 4962 %{
 4963   match(Set dst (LoadUB mem));
 4964 
 4965   ins_cost(125);
 4966   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4967 
 4968   ins_encode %{
 4969     __ movzbl($dst$$Register, $mem$$Address);
 4970   %}
 4971 
 4972   ins_pipe(ialu_reg_mem);
 4973 %}
 4974 
 4975 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4976 instruct loadUB2L(rRegL dst, memory mem)
 4977 %{
 4978   match(Set dst (ConvI2L (LoadUB mem)));
 4979 
 4980   ins_cost(125);
 4981   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 4982 
 4983   ins_encode %{
 4984     __ movzbq($dst$$Register, $mem$$Address);
 4985   %}
 4986 
 4987   ins_pipe(ialu_reg_mem);
 4988 %}
 4989 
 4990 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 4991 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 4992   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 4993   effect(KILL cr);
 4994 
 4995   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 4996             "andl    $dst, right_n_bits($mask, 8)" %}
 4997   ins_encode %{
 4998     Register Rdst = $dst$$Register;
 4999     __ movzbq(Rdst, $mem$$Address);
 5000     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5001   %}
 5002   ins_pipe(ialu_reg_mem);
 5003 %}
 5004 
 5005 // Load Short (16 bit signed)
 5006 instruct loadS(rRegI dst, memory mem)
 5007 %{
 5008   match(Set dst (LoadS mem));
 5009 
 5010   ins_cost(125);
 5011   format %{ "movswl $dst, $mem\t# short" %}
 5012 
 5013   ins_encode %{
 5014     __ movswl($dst$$Register, $mem$$Address);
 5015   %}
 5016 
 5017   ins_pipe(ialu_reg_mem);
 5018 %}
 5019 
 5020 // Load Short (16 bit signed) to Byte (8 bit signed)
 5021 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5022   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5023 
 5024   ins_cost(125);
 5025   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 5026   ins_encode %{
 5027     __ movsbl($dst$$Register, $mem$$Address);
 5028   %}
 5029   ins_pipe(ialu_reg_mem);
 5030 %}
 5031 
 5032 // Load Short (16 bit signed) into Long Register
 5033 instruct loadS2L(rRegL dst, memory mem)
 5034 %{
 5035   match(Set dst (ConvI2L (LoadS mem)));
 5036 
 5037   ins_cost(125);
 5038   format %{ "movswq $dst, $mem\t# short -> long" %}
 5039 
 5040   ins_encode %{
 5041     __ movswq($dst$$Register, $mem$$Address);
 5042   %}
 5043 
 5044   ins_pipe(ialu_reg_mem);
 5045 %}
 5046 
 5047 // Load Unsigned Short/Char (16 bit UNsigned)
 5048 instruct loadUS(rRegI dst, memory mem)
 5049 %{
 5050   match(Set dst (LoadUS mem));
 5051 
 5052   ins_cost(125);
 5053   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 5054 
 5055   ins_encode %{
 5056     __ movzwl($dst$$Register, $mem$$Address);
 5057   %}
 5058 
 5059   ins_pipe(ialu_reg_mem);
 5060 %}
 5061 
 5062 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5063 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5064   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5065 
 5066   ins_cost(125);
 5067   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 5068   ins_encode %{
 5069     __ movsbl($dst$$Register, $mem$$Address);
 5070   %}
 5071   ins_pipe(ialu_reg_mem);
 5072 %}
 5073 
 5074 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5075 instruct loadUS2L(rRegL dst, memory mem)
 5076 %{
 5077   match(Set dst (ConvI2L (LoadUS mem)));
 5078 
 5079   ins_cost(125);
 5080   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 5081 
 5082   ins_encode %{
 5083     __ movzwq($dst$$Register, $mem$$Address);
 5084   %}
 5085 
 5086   ins_pipe(ialu_reg_mem);
 5087 %}
 5088 
 5089 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5090 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5091   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5092 
 5093   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 5094   ins_encode %{
 5095     __ movzbq($dst$$Register, $mem$$Address);
 5096   %}
 5097   ins_pipe(ialu_reg_mem);
 5098 %}
 5099 
 5100 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 5101 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5102   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5103   effect(KILL cr);
 5104 
 5105   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5106             "andl    $dst, right_n_bits($mask, 16)" %}
 5107   ins_encode %{
 5108     Register Rdst = $dst$$Register;
 5109     __ movzwq(Rdst, $mem$$Address);
 5110     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5111   %}
 5112   ins_pipe(ialu_reg_mem);
 5113 %}
 5114 
 5115 // Load Integer
 5116 instruct loadI(rRegI dst, memory mem)
 5117 %{
 5118   match(Set dst (LoadI mem));
 5119 
 5120   ins_cost(125);
 5121   format %{ "movl    $dst, $mem\t# int" %}
 5122 
 5123   ins_encode %{
 5124     __ movl($dst$$Register, $mem$$Address);
 5125   %}
 5126 
 5127   ins_pipe(ialu_reg_mem);
 5128 %}
 5129 
 5130 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5131 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5132   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5133 
 5134   ins_cost(125);
 5135   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 5136   ins_encode %{
 5137     __ movsbl($dst$$Register, $mem$$Address);
 5138   %}
 5139   ins_pipe(ialu_reg_mem);
 5140 %}
 5141 
 5142 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5143 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5144   match(Set dst (AndI (LoadI mem) mask));
 5145 
 5146   ins_cost(125);
 5147   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 5148   ins_encode %{
 5149     __ movzbl($dst$$Register, $mem$$Address);
 5150   %}
 5151   ins_pipe(ialu_reg_mem);
 5152 %}
 5153 
 5154 // Load Integer (32 bit signed) to Short (16 bit signed)
 5155 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5156   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5157 
 5158   ins_cost(125);
 5159   format %{ "movswl  $dst, $mem\t# int -> short" %}
 5160   ins_encode %{
 5161     __ movswl($dst$$Register, $mem$$Address);
 5162   %}
 5163   ins_pipe(ialu_reg_mem);
 5164 %}
 5165 
 5166 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5167 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5168   match(Set dst (AndI (LoadI mem) mask));
 5169 
 5170   ins_cost(125);
 5171   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 5172   ins_encode %{
 5173     __ movzwl($dst$$Register, $mem$$Address);
 5174   %}
 5175   ins_pipe(ialu_reg_mem);
 5176 %}
 5177 
 5178 // Load Integer into Long Register
 5179 instruct loadI2L(rRegL dst, memory mem)
 5180 %{
 5181   match(Set dst (ConvI2L (LoadI mem)));
 5182 
 5183   ins_cost(125);
 5184   format %{ "movslq  $dst, $mem\t# int -> long" %}
 5185 
 5186   ins_encode %{
 5187     __ movslq($dst$$Register, $mem$$Address);
 5188   %}
 5189 
 5190   ins_pipe(ialu_reg_mem);
 5191 %}
 5192 
 5193 // Load Integer with mask 0xFF into Long Register
 5194 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5195   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5196 
 5197   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 5198   ins_encode %{
 5199     __ movzbq($dst$$Register, $mem$$Address);
 5200   %}
 5201   ins_pipe(ialu_reg_mem);
 5202 %}
 5203 
 5204 // Load Integer with mask 0xFFFF into Long Register
 5205 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 5206   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5207 
 5208   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 5209   ins_encode %{
 5210     __ movzwq($dst$$Register, $mem$$Address);
 5211   %}
 5212   ins_pipe(ialu_reg_mem);
 5213 %}
 5214 
 5215 // Load Integer with a 31-bit mask into Long Register
 5216 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 5217   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5218   effect(KILL cr);
 5219 
 5220   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 5221             "andl    $dst, $mask" %}
 5222   ins_encode %{
 5223     Register Rdst = $dst$$Register;
 5224     __ movl(Rdst, $mem$$Address);
 5225     __ andl(Rdst, $mask$$constant);
 5226   %}
 5227   ins_pipe(ialu_reg_mem);
 5228 %}
 5229 
 5230 // Load Unsigned Integer into Long Register
 5231 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 5232 %{
 5233   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5234 
 5235   ins_cost(125);
 5236   format %{ "movl    $dst, $mem\t# uint -> long" %}
 5237 
 5238   ins_encode %{
 5239     __ movl($dst$$Register, $mem$$Address);
 5240   %}
 5241 
 5242   ins_pipe(ialu_reg_mem);
 5243 %}
 5244 
 5245 // Load Long
 5246 instruct loadL(rRegL dst, memory mem)
 5247 %{
 5248   match(Set dst (LoadL mem));
 5249 
 5250   ins_cost(125);
 5251   format %{ "movq    $dst, $mem\t# long" %}
 5252 
 5253   ins_encode %{
 5254     __ movq($dst$$Register, $mem$$Address);
 5255   %}
 5256 
 5257   ins_pipe(ialu_reg_mem); // XXX
 5258 %}
 5259 
 5260 // Load Range
 5261 instruct loadRange(rRegI dst, memory mem)
 5262 %{
 5263   match(Set dst (LoadRange mem));
 5264 
 5265   ins_cost(125); // XXX
 5266   format %{ "movl    $dst, $mem\t# range" %}
 5267   ins_encode %{
 5268     __ movl($dst$$Register, $mem$$Address);
 5269   %}
 5270   ins_pipe(ialu_reg_mem);
 5271 %}
 5272 
 5273 // Load Pointer
 5274 instruct loadP(rRegP dst, memory mem)
 5275 %{
 5276   match(Set dst (LoadP mem));
 5277   predicate(n->as_Load()->barrier_data() == 0);
 5278 
 5279   ins_cost(125); // XXX
 5280   format %{ "movq    $dst, $mem\t# ptr" %}
 5281   ins_encode %{
 5282     __ movq($dst$$Register, $mem$$Address);
 5283   %}
 5284   ins_pipe(ialu_reg_mem); // XXX
 5285 %}
 5286 
 5287 // Load Compressed Pointer
 5288 instruct loadN(rRegN dst, memory mem)
 5289 %{
 5290    match(Set dst (LoadN mem));
 5291 
 5292    ins_cost(125); // XXX
 5293    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 5294    ins_encode %{
 5295      __ movl($dst$$Register, $mem$$Address);
 5296    %}
 5297    ins_pipe(ialu_reg_mem); // XXX
 5298 %}
 5299 
 5300 
 5301 // Load Klass Pointer
 5302 instruct loadKlass(rRegP dst, memory mem)
 5303 %{
 5304   match(Set dst (LoadKlass mem));
 5305 
 5306   ins_cost(125); // XXX
 5307   format %{ "movq    $dst, $mem\t# class" %}
 5308   ins_encode %{
 5309     __ movq($dst$$Register, $mem$$Address);
 5310   %}
 5311   ins_pipe(ialu_reg_mem); // XXX
 5312 %}
 5313 
 5314 // Load narrow Klass Pointer
 5315 instruct loadNKlass(rRegN dst, memory mem)
 5316 %{
 5317   predicate(!UseCompactObjectHeaders);
 5318   match(Set dst (LoadNKlass mem));
 5319 
 5320   ins_cost(125); // XXX
 5321   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5322   ins_encode %{
 5323     __ movl($dst$$Register, $mem$$Address);
 5324   %}
 5325   ins_pipe(ialu_reg_mem); // XXX
 5326 %}
 5327 
 5328 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 5329 %{
 5330   predicate(UseCompactObjectHeaders);
 5331   match(Set dst (LoadNKlass mem));
 5332   effect(KILL cr);
 5333   ins_cost(125); // XXX
 5334   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5335   ins_encode %{
 5336     Register index = $mem$$index != 4 ? $mem$$index$$Register : noreg;
 5337     Address::ScaleFactor sf = (index != noreg) ? static_cast<Address::ScaleFactor>($mem$$scale) : Address::no_scale;
 5338     __ load_nklass_compact_c2($dst$$Register, $mem$$base$$Register, index, sf, $mem$$disp);
 5339   %}
 5340   ins_pipe(pipe_slow); // XXX
 5341 %}
 5342 
 5343 // Load Float
 5344 instruct loadF(regF dst, memory mem)
 5345 %{
 5346   match(Set dst (LoadF mem));
 5347 
 5348   ins_cost(145); // XXX
 5349   format %{ "movss   $dst, $mem\t# float" %}
 5350   ins_encode %{
 5351     __ movflt($dst$$XMMRegister, $mem$$Address);
 5352   %}
 5353   ins_pipe(pipe_slow); // XXX
 5354 %}
 5355 
 5356 // Load Double
 5357 instruct loadD_partial(regD dst, memory mem)
 5358 %{
 5359   predicate(!UseXmmLoadAndClearUpper);
 5360   match(Set dst (LoadD mem));
 5361 
 5362   ins_cost(145); // XXX
 5363   format %{ "movlpd  $dst, $mem\t# double" %}
 5364   ins_encode %{
 5365     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5366   %}
 5367   ins_pipe(pipe_slow); // XXX
 5368 %}
 5369 
 5370 instruct loadD(regD dst, memory mem)
 5371 %{
 5372   predicate(UseXmmLoadAndClearUpper);
 5373   match(Set dst (LoadD mem));
 5374 
 5375   ins_cost(145); // XXX
 5376   format %{ "movsd   $dst, $mem\t# double" %}
 5377   ins_encode %{
 5378     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5379   %}
 5380   ins_pipe(pipe_slow); // XXX
 5381 %}
 5382 
 5383 
 5384 // Following pseudo code describes the algorithm for max[FD]:
 5385 // Min algorithm is on similar lines
 5386 //  btmp = (b < +0.0) ? a : b
 5387 //  atmp = (b < +0.0) ? b : a
 5388 //  Tmp  = Max_Float(atmp , btmp)
 5389 //  Res  = (atmp == NaN) ? atmp : Tmp
 5390 
 5391 // max = java.lang.Math.max(float a, float b)
 5392 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5393   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5394   match(Set dst (MaxF a b));
 5395   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5396   format %{
 5397      "vblendvps        $btmp,$b,$a,$b           \n\t"
 5398      "vblendvps        $atmp,$a,$b,$b           \n\t"
 5399      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 5400      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 5401      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 5402   %}
 5403   ins_encode %{
 5404     int vector_len = Assembler::AVX_128bit;
 5405     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5406     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5407     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5408     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5409     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5410  %}
 5411   ins_pipe( pipe_slow );
 5412 %}
 5413 
 5414 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5415   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5416   match(Set dst (MaxF a b));
 5417   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5418 
 5419   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 5420   ins_encode %{
 5421     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5422                     false /*min*/, true /*single*/);
 5423   %}
 5424   ins_pipe( pipe_slow );
 5425 %}
 5426 
 5427 // max = java.lang.Math.max(double a, double b)
 5428 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5429   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5430   match(Set dst (MaxD a b));
 5431   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 5432   format %{
 5433      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 5434      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 5435      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 5436      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 5437      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 5438   %}
 5439   ins_encode %{
 5440     int vector_len = Assembler::AVX_128bit;
 5441     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5442     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5443     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5444     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5445     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5446   %}
 5447   ins_pipe( pipe_slow );
 5448 %}
 5449 
 5450 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5451   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5452   match(Set dst (MaxD a b));
 5453   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5454 
 5455   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 5456   ins_encode %{
 5457     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5458                     false /*min*/, false /*single*/);
 5459   %}
 5460   ins_pipe( pipe_slow );
 5461 %}
 5462 
 5463 // min = java.lang.Math.min(float a, float b)
 5464 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5465   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5466   match(Set dst (MinF a b));
 5467   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5468   format %{
 5469      "vblendvps        $atmp,$a,$b,$a             \n\t"
 5470      "vblendvps        $btmp,$b,$a,$a             \n\t"
 5471      "vminss           $tmp,$atmp,$btmp           \n\t"
 5472      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 5473      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 5474   %}
 5475   ins_encode %{
 5476     int vector_len = Assembler::AVX_128bit;
 5477     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5478     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5479     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5480     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5481     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5482   %}
 5483   ins_pipe( pipe_slow );
 5484 %}
 5485 
 5486 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5487   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5488   match(Set dst (MinF a b));
 5489   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5490 
 5491   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 5492   ins_encode %{
 5493     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5494                     true /*min*/, true /*single*/);
 5495   %}
 5496   ins_pipe( pipe_slow );
 5497 %}
 5498 
 5499 // min = java.lang.Math.min(double a, double b)
 5500 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5501   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5502   match(Set dst (MinD a b));
 5503   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5504   format %{
 5505      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 5506      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 5507      "vminsd           $tmp,$atmp,$btmp         \n\t"
 5508      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 5509      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 5510   %}
 5511   ins_encode %{
 5512     int vector_len = Assembler::AVX_128bit;
 5513     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5514     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5515     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5516     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5517     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5518   %}
 5519   ins_pipe( pipe_slow );
 5520 %}
 5521 
 5522 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5523   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5524   match(Set dst (MinD a b));
 5525   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5526 
 5527   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 5528   ins_encode %{
 5529     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5530                     true /*min*/, false /*single*/);
 5531   %}
 5532   ins_pipe( pipe_slow );
 5533 %}
 5534 
 5535 // Load Effective Address
 5536 instruct leaP8(rRegP dst, indOffset8 mem)
 5537 %{
 5538   match(Set dst mem);
 5539 
 5540   ins_cost(110); // XXX
 5541   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 5542   ins_encode %{
 5543     __ leaq($dst$$Register, $mem$$Address);
 5544   %}
 5545   ins_pipe(ialu_reg_reg_fat);
 5546 %}
 5547 
 5548 instruct leaP32(rRegP dst, indOffset32 mem)
 5549 %{
 5550   match(Set dst mem);
 5551 
 5552   ins_cost(110);
 5553   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 5554   ins_encode %{
 5555     __ leaq($dst$$Register, $mem$$Address);
 5556   %}
 5557   ins_pipe(ialu_reg_reg_fat);
 5558 %}
 5559 
 5560 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 5561 %{
 5562   match(Set dst mem);
 5563 
 5564   ins_cost(110);
 5565   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 5566   ins_encode %{
 5567     __ leaq($dst$$Register, $mem$$Address);
 5568   %}
 5569   ins_pipe(ialu_reg_reg_fat);
 5570 %}
 5571 
 5572 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 5573 %{
 5574   match(Set dst mem);
 5575 
 5576   ins_cost(110);
 5577   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5578   ins_encode %{
 5579     __ leaq($dst$$Register, $mem$$Address);
 5580   %}
 5581   ins_pipe(ialu_reg_reg_fat);
 5582 %}
 5583 
 5584 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 5585 %{
 5586   match(Set dst mem);
 5587 
 5588   ins_cost(110);
 5589   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5590   ins_encode %{
 5591     __ leaq($dst$$Register, $mem$$Address);
 5592   %}
 5593   ins_pipe(ialu_reg_reg_fat);
 5594 %}
 5595 
 5596 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 5597 %{
 5598   match(Set dst mem);
 5599 
 5600   ins_cost(110);
 5601   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 5602   ins_encode %{
 5603     __ leaq($dst$$Register, $mem$$Address);
 5604   %}
 5605   ins_pipe(ialu_reg_reg_fat);
 5606 %}
 5607 
 5608 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 5609 %{
 5610   match(Set dst mem);
 5611 
 5612   ins_cost(110);
 5613   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 5614   ins_encode %{
 5615     __ leaq($dst$$Register, $mem$$Address);
 5616   %}
 5617   ins_pipe(ialu_reg_reg_fat);
 5618 %}
 5619 
 5620 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 5621 %{
 5622   match(Set dst mem);
 5623 
 5624   ins_cost(110);
 5625   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 5626   ins_encode %{
 5627     __ leaq($dst$$Register, $mem$$Address);
 5628   %}
 5629   ins_pipe(ialu_reg_reg_fat);
 5630 %}
 5631 
 5632 // Load Effective Address which uses Narrow (32-bits) oop
 5633 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 5634 %{
 5635   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 5636   match(Set dst mem);
 5637 
 5638   ins_cost(110);
 5639   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 5640   ins_encode %{
 5641     __ leaq($dst$$Register, $mem$$Address);
 5642   %}
 5643   ins_pipe(ialu_reg_reg_fat);
 5644 %}
 5645 
 5646 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 5647 %{
 5648   predicate(CompressedOops::shift() == 0);
 5649   match(Set dst mem);
 5650 
 5651   ins_cost(110); // XXX
 5652   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 5653   ins_encode %{
 5654     __ leaq($dst$$Register, $mem$$Address);
 5655   %}
 5656   ins_pipe(ialu_reg_reg_fat);
 5657 %}
 5658 
 5659 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 5660 %{
 5661   predicate(CompressedOops::shift() == 0);
 5662   match(Set dst mem);
 5663 
 5664   ins_cost(110);
 5665   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 5666   ins_encode %{
 5667     __ leaq($dst$$Register, $mem$$Address);
 5668   %}
 5669   ins_pipe(ialu_reg_reg_fat);
 5670 %}
 5671 
 5672 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 5673 %{
 5674   predicate(CompressedOops::shift() == 0);
 5675   match(Set dst mem);
 5676 
 5677   ins_cost(110);
 5678   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 5679   ins_encode %{
 5680     __ leaq($dst$$Register, $mem$$Address);
 5681   %}
 5682   ins_pipe(ialu_reg_reg_fat);
 5683 %}
 5684 
 5685 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 5686 %{
 5687   predicate(CompressedOops::shift() == 0);
 5688   match(Set dst mem);
 5689 
 5690   ins_cost(110);
 5691   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 5692   ins_encode %{
 5693     __ leaq($dst$$Register, $mem$$Address);
 5694   %}
 5695   ins_pipe(ialu_reg_reg_fat);
 5696 %}
 5697 
 5698 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 5699 %{
 5700   predicate(CompressedOops::shift() == 0);
 5701   match(Set dst mem);
 5702 
 5703   ins_cost(110);
 5704   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 5705   ins_encode %{
 5706     __ leaq($dst$$Register, $mem$$Address);
 5707   %}
 5708   ins_pipe(ialu_reg_reg_fat);
 5709 %}
 5710 
 5711 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 5712 %{
 5713   predicate(CompressedOops::shift() == 0);
 5714   match(Set dst mem);
 5715 
 5716   ins_cost(110);
 5717   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 5718   ins_encode %{
 5719     __ leaq($dst$$Register, $mem$$Address);
 5720   %}
 5721   ins_pipe(ialu_reg_reg_fat);
 5722 %}
 5723 
 5724 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 5725 %{
 5726   predicate(CompressedOops::shift() == 0);
 5727   match(Set dst mem);
 5728 
 5729   ins_cost(110);
 5730   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 5731   ins_encode %{
 5732     __ leaq($dst$$Register, $mem$$Address);
 5733   %}
 5734   ins_pipe(ialu_reg_reg_fat);
 5735 %}
 5736 
 5737 instruct loadConI(rRegI dst, immI src)
 5738 %{
 5739   match(Set dst src);
 5740 
 5741   format %{ "movl    $dst, $src\t# int" %}
 5742   ins_encode %{
 5743     __ movl($dst$$Register, $src$$constant);
 5744   %}
 5745   ins_pipe(ialu_reg_fat); // XXX
 5746 %}
 5747 
 5748 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 5749 %{
 5750   match(Set dst src);
 5751   effect(KILL cr);
 5752 
 5753   ins_cost(50);
 5754   format %{ "xorl    $dst, $dst\t# int" %}
 5755   ins_encode %{
 5756     __ xorl($dst$$Register, $dst$$Register);
 5757   %}
 5758   ins_pipe(ialu_reg);
 5759 %}
 5760 
 5761 instruct loadConL(rRegL dst, immL src)
 5762 %{
 5763   match(Set dst src);
 5764 
 5765   ins_cost(150);
 5766   format %{ "movq    $dst, $src\t# long" %}
 5767   ins_encode %{
 5768     __ mov64($dst$$Register, $src$$constant);
 5769   %}
 5770   ins_pipe(ialu_reg);
 5771 %}
 5772 
 5773 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 5774 %{
 5775   match(Set dst src);
 5776   effect(KILL cr);
 5777 
 5778   ins_cost(50);
 5779   format %{ "xorl    $dst, $dst\t# long" %}
 5780   ins_encode %{
 5781     __ xorl($dst$$Register, $dst$$Register);
 5782   %}
 5783   ins_pipe(ialu_reg); // XXX
 5784 %}
 5785 
 5786 instruct loadConUL32(rRegL dst, immUL32 src)
 5787 %{
 5788   match(Set dst src);
 5789 
 5790   ins_cost(60);
 5791   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 5792   ins_encode %{
 5793     __ movl($dst$$Register, $src$$constant);
 5794   %}
 5795   ins_pipe(ialu_reg);
 5796 %}
 5797 
 5798 instruct loadConL32(rRegL dst, immL32 src)
 5799 %{
 5800   match(Set dst src);
 5801 
 5802   ins_cost(70);
 5803   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 5804   ins_encode %{
 5805     __ movq($dst$$Register, $src$$constant);
 5806   %}
 5807   ins_pipe(ialu_reg);
 5808 %}
 5809 
 5810 instruct loadConP(rRegP dst, immP con) %{
 5811   match(Set dst con);
 5812 
 5813   format %{ "movq    $dst, $con\t# ptr" %}
 5814   ins_encode %{
 5815     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 5816   %}
 5817   ins_pipe(ialu_reg_fat); // XXX
 5818 %}
 5819 
 5820 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 5821 %{
 5822   match(Set dst src);
 5823   effect(KILL cr);
 5824 
 5825   ins_cost(50);
 5826   format %{ "xorl    $dst, $dst\t# ptr" %}
 5827   ins_encode %{
 5828     __ xorl($dst$$Register, $dst$$Register);
 5829   %}
 5830   ins_pipe(ialu_reg);
 5831 %}
 5832 
 5833 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 5834 %{
 5835   match(Set dst src);
 5836   effect(KILL cr);
 5837 
 5838   ins_cost(60);
 5839   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5840   ins_encode %{
 5841     __ movl($dst$$Register, $src$$constant);
 5842   %}
 5843   ins_pipe(ialu_reg);
 5844 %}
 5845 
 5846 instruct loadConF(regF dst, immF con) %{
 5847   match(Set dst con);
 5848   ins_cost(125);
 5849   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5850   ins_encode %{
 5851     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5852   %}
 5853   ins_pipe(pipe_slow);
 5854 %}
 5855 
 5856 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5857   match(Set dst src);
 5858   effect(KILL cr);
 5859   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 5860   ins_encode %{
 5861     __ xorq($dst$$Register, $dst$$Register);
 5862   %}
 5863   ins_pipe(ialu_reg);
 5864 %}
 5865 
 5866 instruct loadConN(rRegN dst, immN src) %{
 5867   match(Set dst src);
 5868 
 5869   ins_cost(125);
 5870   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5871   ins_encode %{
 5872     address con = (address)$src$$constant;
 5873     if (con == NULL) {
 5874       ShouldNotReachHere();
 5875     } else {
 5876       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5877     }
 5878   %}
 5879   ins_pipe(ialu_reg_fat); // XXX
 5880 %}
 5881 
 5882 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5883   match(Set dst src);
 5884 
 5885   ins_cost(125);
 5886   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5887   ins_encode %{
 5888     address con = (address)$src$$constant;
 5889     if (con == NULL) {
 5890       ShouldNotReachHere();
 5891     } else {
 5892       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5893     }
 5894   %}
 5895   ins_pipe(ialu_reg_fat); // XXX
 5896 %}
 5897 
 5898 instruct loadConF0(regF dst, immF0 src)
 5899 %{
 5900   match(Set dst src);
 5901   ins_cost(100);
 5902 
 5903   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5904   ins_encode %{
 5905     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5906   %}
 5907   ins_pipe(pipe_slow);
 5908 %}
 5909 
 5910 // Use the same format since predicate() can not be used here.
 5911 instruct loadConD(regD dst, immD con) %{
 5912   match(Set dst con);
 5913   ins_cost(125);
 5914   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5915   ins_encode %{
 5916     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5917   %}
 5918   ins_pipe(pipe_slow);
 5919 %}
 5920 
 5921 instruct loadConD0(regD dst, immD0 src)
 5922 %{
 5923   match(Set dst src);
 5924   ins_cost(100);
 5925 
 5926   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5927   ins_encode %{
 5928     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5929   %}
 5930   ins_pipe(pipe_slow);
 5931 %}
 5932 
 5933 instruct loadSSI(rRegI dst, stackSlotI src)
 5934 %{
 5935   match(Set dst src);
 5936 
 5937   ins_cost(125);
 5938   format %{ "movl    $dst, $src\t# int stk" %}
 5939   opcode(0x8B);
 5940   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
 5941   ins_pipe(ialu_reg_mem);
 5942 %}
 5943 
 5944 instruct loadSSL(rRegL dst, stackSlotL src)
 5945 %{
 5946   match(Set dst src);
 5947 
 5948   ins_cost(125);
 5949   format %{ "movq    $dst, $src\t# long stk" %}
 5950   opcode(0x8B);
 5951   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5952   ins_pipe(ialu_reg_mem);
 5953 %}
 5954 
 5955 instruct loadSSP(rRegP dst, stackSlotP src)
 5956 %{
 5957   match(Set dst src);
 5958 
 5959   ins_cost(125);
 5960   format %{ "movq    $dst, $src\t# ptr stk" %}
 5961   opcode(0x8B);
 5962   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5963   ins_pipe(ialu_reg_mem);
 5964 %}
 5965 
 5966 instruct loadSSF(regF dst, stackSlotF src)
 5967 %{
 5968   match(Set dst src);
 5969 
 5970   ins_cost(125);
 5971   format %{ "movss   $dst, $src\t# float stk" %}
 5972   ins_encode %{
 5973     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5974   %}
 5975   ins_pipe(pipe_slow); // XXX
 5976 %}
 5977 
 5978 // Use the same format since predicate() can not be used here.
 5979 instruct loadSSD(regD dst, stackSlotD src)
 5980 %{
 5981   match(Set dst src);
 5982 
 5983   ins_cost(125);
 5984   format %{ "movsd   $dst, $src\t# double stk" %}
 5985   ins_encode  %{
 5986     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5987   %}
 5988   ins_pipe(pipe_slow); // XXX
 5989 %}
 5990 
 5991 // Prefetch instructions for allocation.
 5992 // Must be safe to execute with invalid address (cannot fault).
 5993 
 5994 instruct prefetchAlloc( memory mem ) %{
 5995   predicate(AllocatePrefetchInstr==3);
 5996   match(PrefetchAllocation mem);
 5997   ins_cost(125);
 5998 
 5999   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 6000   ins_encode %{
 6001     __ prefetchw($mem$$Address);
 6002   %}
 6003   ins_pipe(ialu_mem);
 6004 %}
 6005 
 6006 instruct prefetchAllocNTA( memory mem ) %{
 6007   predicate(AllocatePrefetchInstr==0);
 6008   match(PrefetchAllocation mem);
 6009   ins_cost(125);
 6010 
 6011   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 6012   ins_encode %{
 6013     __ prefetchnta($mem$$Address);
 6014   %}
 6015   ins_pipe(ialu_mem);
 6016 %}
 6017 
 6018 instruct prefetchAllocT0( memory mem ) %{
 6019   predicate(AllocatePrefetchInstr==1);
 6020   match(PrefetchAllocation mem);
 6021   ins_cost(125);
 6022 
 6023   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 6024   ins_encode %{
 6025     __ prefetcht0($mem$$Address);
 6026   %}
 6027   ins_pipe(ialu_mem);
 6028 %}
 6029 
 6030 instruct prefetchAllocT2( memory mem ) %{
 6031   predicate(AllocatePrefetchInstr==2);
 6032   match(PrefetchAllocation mem);
 6033   ins_cost(125);
 6034 
 6035   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 6036   ins_encode %{
 6037     __ prefetcht2($mem$$Address);
 6038   %}
 6039   ins_pipe(ialu_mem);
 6040 %}
 6041 
 6042 //----------Store Instructions-------------------------------------------------
 6043 
 6044 // Store Byte
 6045 instruct storeB(memory mem, rRegI src)
 6046 %{
 6047   match(Set mem (StoreB mem src));
 6048 
 6049   ins_cost(125); // XXX
 6050   format %{ "movb    $mem, $src\t# byte" %}
 6051   ins_encode %{
 6052     __ movb($mem$$Address, $src$$Register);
 6053   %}
 6054   ins_pipe(ialu_mem_reg);
 6055 %}
 6056 
 6057 // Store Char/Short
 6058 instruct storeC(memory mem, rRegI src)
 6059 %{
 6060   match(Set mem (StoreC mem src));
 6061 
 6062   ins_cost(125); // XXX
 6063   format %{ "movw    $mem, $src\t# char/short" %}
 6064   ins_encode %{
 6065     __ movw($mem$$Address, $src$$Register);
 6066   %}
 6067   ins_pipe(ialu_mem_reg);
 6068 %}
 6069 
 6070 // Store Integer
 6071 instruct storeI(memory mem, rRegI src)
 6072 %{
 6073   match(Set mem (StoreI mem src));
 6074 
 6075   ins_cost(125); // XXX
 6076   format %{ "movl    $mem, $src\t# int" %}
 6077   ins_encode %{
 6078     __ movl($mem$$Address, $src$$Register);
 6079   %}
 6080   ins_pipe(ialu_mem_reg);
 6081 %}
 6082 
 6083 // Store Long
 6084 instruct storeL(memory mem, rRegL src)
 6085 %{
 6086   match(Set mem (StoreL mem src));
 6087 
 6088   ins_cost(125); // XXX
 6089   format %{ "movq    $mem, $src\t# long" %}
 6090   ins_encode %{
 6091     __ movq($mem$$Address, $src$$Register);
 6092   %}
 6093   ins_pipe(ialu_mem_reg); // XXX
 6094 %}
 6095 
 6096 // Store Pointer
 6097 instruct storeP(memory mem, any_RegP src)
 6098 %{
 6099   predicate(n->as_Store()->barrier_data() == 0);
 6100   match(Set mem (StoreP mem src));
 6101 
 6102   ins_cost(125); // XXX
 6103   format %{ "movq    $mem, $src\t# ptr" %}
 6104   ins_encode %{
 6105     __ movq($mem$$Address, $src$$Register);
 6106   %}
 6107   ins_pipe(ialu_mem_reg);
 6108 %}
 6109 
 6110 instruct storeImmP0(memory mem, immP0 zero)
 6111 %{
 6112   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
 6113   match(Set mem (StoreP mem zero));
 6114 
 6115   ins_cost(125); // XXX
 6116   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6117   ins_encode %{
 6118     __ movq($mem$$Address, r12);
 6119   %}
 6120   ins_pipe(ialu_mem_reg);
 6121 %}
 6122 
 6123 // Store NULL Pointer, mark word, or other simple pointer constant.
 6124 instruct storeImmP(memory mem, immP31 src)
 6125 %{
 6126   predicate(n->as_Store()->barrier_data() == 0);
 6127   match(Set mem (StoreP mem src));
 6128 
 6129   ins_cost(150); // XXX
 6130   format %{ "movq    $mem, $src\t# ptr" %}
 6131   ins_encode %{
 6132     __ movq($mem$$Address, $src$$constant);
 6133   %}
 6134   ins_pipe(ialu_mem_imm);
 6135 %}
 6136 
 6137 // Store Compressed Pointer
 6138 instruct storeN(memory mem, rRegN src)
 6139 %{
 6140   match(Set mem (StoreN mem src));
 6141 
 6142   ins_cost(125); // XXX
 6143   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6144   ins_encode %{
 6145     __ movl($mem$$Address, $src$$Register);
 6146   %}
 6147   ins_pipe(ialu_mem_reg);
 6148 %}
 6149 
 6150 instruct storeNKlass(memory mem, rRegN src)
 6151 %{
 6152   match(Set mem (StoreNKlass mem src));
 6153 
 6154   ins_cost(125); // XXX
 6155   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6156   ins_encode %{
 6157     __ movl($mem$$Address, $src$$Register);
 6158   %}
 6159   ins_pipe(ialu_mem_reg);
 6160 %}
 6161 
 6162 instruct storeImmN0(memory mem, immN0 zero)
 6163 %{
 6164   predicate(CompressedOops::base() == NULL);
 6165   match(Set mem (StoreN mem zero));
 6166 
 6167   ins_cost(125); // XXX
 6168   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6169   ins_encode %{
 6170     __ movl($mem$$Address, r12);
 6171   %}
 6172   ins_pipe(ialu_mem_reg);
 6173 %}
 6174 
 6175 instruct storeImmN(memory mem, immN src)
 6176 %{
 6177   match(Set mem (StoreN mem src));
 6178 
 6179   ins_cost(150); // XXX
 6180   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6181   ins_encode %{
 6182     address con = (address)$src$$constant;
 6183     if (con == NULL) {
 6184       __ movl($mem$$Address, 0);
 6185     } else {
 6186       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6187     }
 6188   %}
 6189   ins_pipe(ialu_mem_imm);
 6190 %}
 6191 
 6192 instruct storeImmNKlass(memory mem, immNKlass src)
 6193 %{
 6194   match(Set mem (StoreNKlass mem src));
 6195 
 6196   ins_cost(150); // XXX
 6197   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6198   ins_encode %{
 6199     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6200   %}
 6201   ins_pipe(ialu_mem_imm);
 6202 %}
 6203 
 6204 // Store Integer Immediate
 6205 instruct storeImmI0(memory mem, immI_0 zero)
 6206 %{
 6207   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6208   match(Set mem (StoreI mem zero));
 6209 
 6210   ins_cost(125); // XXX
 6211   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6212   ins_encode %{
 6213     __ movl($mem$$Address, r12);
 6214   %}
 6215   ins_pipe(ialu_mem_reg);
 6216 %}
 6217 
 6218 instruct storeImmI(memory mem, immI src)
 6219 %{
 6220   match(Set mem (StoreI mem src));
 6221 
 6222   ins_cost(150);
 6223   format %{ "movl    $mem, $src\t# int" %}
 6224   ins_encode %{
 6225     __ movl($mem$$Address, $src$$constant);
 6226   %}
 6227   ins_pipe(ialu_mem_imm);
 6228 %}
 6229 
 6230 // Store Long Immediate
 6231 instruct storeImmL0(memory mem, immL0 zero)
 6232 %{
 6233   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6234   match(Set mem (StoreL mem zero));
 6235 
 6236   ins_cost(125); // XXX
 6237   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6238   ins_encode %{
 6239     __ movq($mem$$Address, r12);
 6240   %}
 6241   ins_pipe(ialu_mem_reg);
 6242 %}
 6243 
 6244 instruct storeImmL(memory mem, immL32 src)
 6245 %{
 6246   match(Set mem (StoreL mem src));
 6247 
 6248   ins_cost(150);
 6249   format %{ "movq    $mem, $src\t# long" %}
 6250   ins_encode %{
 6251     __ movq($mem$$Address, $src$$constant);
 6252   %}
 6253   ins_pipe(ialu_mem_imm);
 6254 %}
 6255 
 6256 // Store Short/Char Immediate
 6257 instruct storeImmC0(memory mem, immI_0 zero)
 6258 %{
 6259   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6260   match(Set mem (StoreC mem zero));
 6261 
 6262   ins_cost(125); // XXX
 6263   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6264   ins_encode %{
 6265     __ movw($mem$$Address, r12);
 6266   %}
 6267   ins_pipe(ialu_mem_reg);
 6268 %}
 6269 
 6270 instruct storeImmI16(memory mem, immI16 src)
 6271 %{
 6272   predicate(UseStoreImmI16);
 6273   match(Set mem (StoreC mem src));
 6274 
 6275   ins_cost(150);
 6276   format %{ "movw    $mem, $src\t# short/char" %}
 6277   ins_encode %{
 6278     __ movw($mem$$Address, $src$$constant);
 6279   %}
 6280   ins_pipe(ialu_mem_imm);
 6281 %}
 6282 
 6283 // Store Byte Immediate
 6284 instruct storeImmB0(memory mem, immI_0 zero)
 6285 %{
 6286   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6287   match(Set mem (StoreB mem zero));
 6288 
 6289   ins_cost(125); // XXX
 6290   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6291   ins_encode %{
 6292     __ movb($mem$$Address, r12);
 6293   %}
 6294   ins_pipe(ialu_mem_reg);
 6295 %}
 6296 
 6297 instruct storeImmB(memory mem, immI8 src)
 6298 %{
 6299   match(Set mem (StoreB mem src));
 6300 
 6301   ins_cost(150); // XXX
 6302   format %{ "movb    $mem, $src\t# byte" %}
 6303   ins_encode %{
 6304     __ movb($mem$$Address, $src$$constant);
 6305   %}
 6306   ins_pipe(ialu_mem_imm);
 6307 %}
 6308 
 6309 // Store CMS card-mark Immediate
 6310 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6311 %{
 6312   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6313   match(Set mem (StoreCM mem zero));
 6314 
 6315   ins_cost(125); // XXX
 6316   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6317   ins_encode %{
 6318     __ movb($mem$$Address, r12);
 6319   %}
 6320   ins_pipe(ialu_mem_reg);
 6321 %}
 6322 
 6323 instruct storeImmCM0(memory mem, immI_0 src)
 6324 %{
 6325   match(Set mem (StoreCM mem src));
 6326 
 6327   ins_cost(150); // XXX
 6328   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6329   ins_encode %{
 6330     __ movb($mem$$Address, $src$$constant);
 6331   %}
 6332   ins_pipe(ialu_mem_imm);
 6333 %}
 6334 
 6335 // Store Float
 6336 instruct storeF(memory mem, regF src)
 6337 %{
 6338   match(Set mem (StoreF mem src));
 6339 
 6340   ins_cost(95); // XXX
 6341   format %{ "movss   $mem, $src\t# float" %}
 6342   ins_encode %{
 6343     __ movflt($mem$$Address, $src$$XMMRegister);
 6344   %}
 6345   ins_pipe(pipe_slow); // XXX
 6346 %}
 6347 
 6348 // Store immediate Float value (it is faster than store from XMM register)
 6349 instruct storeF0(memory mem, immF0 zero)
 6350 %{
 6351   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6352   match(Set mem (StoreF mem zero));
 6353 
 6354   ins_cost(25); // XXX
 6355   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6356   ins_encode %{
 6357     __ movl($mem$$Address, r12);
 6358   %}
 6359   ins_pipe(ialu_mem_reg);
 6360 %}
 6361 
 6362 instruct storeF_imm(memory mem, immF src)
 6363 %{
 6364   match(Set mem (StoreF mem src));
 6365 
 6366   ins_cost(50);
 6367   format %{ "movl    $mem, $src\t# float" %}
 6368   ins_encode %{
 6369     __ movl($mem$$Address, jint_cast($src$$constant));
 6370   %}
 6371   ins_pipe(ialu_mem_imm);
 6372 %}
 6373 
 6374 // Store Double
 6375 instruct storeD(memory mem, regD src)
 6376 %{
 6377   match(Set mem (StoreD mem src));
 6378 
 6379   ins_cost(95); // XXX
 6380   format %{ "movsd   $mem, $src\t# double" %}
 6381   ins_encode %{
 6382     __ movdbl($mem$$Address, $src$$XMMRegister);
 6383   %}
 6384   ins_pipe(pipe_slow); // XXX
 6385 %}
 6386 
 6387 // Store immediate double 0.0 (it is faster than store from XMM register)
 6388 instruct storeD0_imm(memory mem, immD0 src)
 6389 %{
 6390   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 6391   match(Set mem (StoreD mem src));
 6392 
 6393   ins_cost(50);
 6394   format %{ "movq    $mem, $src\t# double 0." %}
 6395   ins_encode %{
 6396     __ movq($mem$$Address, $src$$constant);
 6397   %}
 6398   ins_pipe(ialu_mem_imm);
 6399 %}
 6400 
 6401 instruct storeD0(memory mem, immD0 zero)
 6402 %{
 6403   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6404   match(Set mem (StoreD mem zero));
 6405 
 6406   ins_cost(25); // XXX
 6407   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6408   ins_encode %{
 6409     __ movq($mem$$Address, r12);
 6410   %}
 6411   ins_pipe(ialu_mem_reg);
 6412 %}
 6413 
 6414 instruct storeSSI(stackSlotI dst, rRegI src)
 6415 %{
 6416   match(Set dst src);
 6417 
 6418   ins_cost(100);
 6419   format %{ "movl    $dst, $src\t# int stk" %}
 6420   opcode(0x89);
 6421   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6422   ins_pipe( ialu_mem_reg );
 6423 %}
 6424 
 6425 instruct storeSSL(stackSlotL dst, rRegL src)
 6426 %{
 6427   match(Set dst src);
 6428 
 6429   ins_cost(100);
 6430   format %{ "movq    $dst, $src\t# long stk" %}
 6431   opcode(0x89);
 6432   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6433   ins_pipe(ialu_mem_reg);
 6434 %}
 6435 
 6436 instruct storeSSP(stackSlotP dst, rRegP src)
 6437 %{
 6438   match(Set dst src);
 6439 
 6440   ins_cost(100);
 6441   format %{ "movq    $dst, $src\t# ptr stk" %}
 6442   opcode(0x89);
 6443   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6444   ins_pipe(ialu_mem_reg);
 6445 %}
 6446 
 6447 instruct storeSSF(stackSlotF dst, regF src)
 6448 %{
 6449   match(Set dst src);
 6450 
 6451   ins_cost(95); // XXX
 6452   format %{ "movss   $dst, $src\t# float stk" %}
 6453   ins_encode %{
 6454     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6455   %}
 6456   ins_pipe(pipe_slow); // XXX
 6457 %}
 6458 
 6459 instruct storeSSD(stackSlotD dst, regD src)
 6460 %{
 6461   match(Set dst src);
 6462 
 6463   ins_cost(95); // XXX
 6464   format %{ "movsd   $dst, $src\t# double stk" %}
 6465   ins_encode %{
 6466     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6467   %}
 6468   ins_pipe(pipe_slow); // XXX
 6469 %}
 6470 
 6471 instruct cacheWB(indirect addr)
 6472 %{
 6473   predicate(VM_Version::supports_data_cache_line_flush());
 6474   match(CacheWB addr);
 6475 
 6476   ins_cost(100);
 6477   format %{"cache wb $addr" %}
 6478   ins_encode %{
 6479     assert($addr->index_position() < 0, "should be");
 6480     assert($addr$$disp == 0, "should be");
 6481     __ cache_wb(Address($addr$$base$$Register, 0));
 6482   %}
 6483   ins_pipe(pipe_slow); // XXX
 6484 %}
 6485 
 6486 instruct cacheWBPreSync()
 6487 %{
 6488   predicate(VM_Version::supports_data_cache_line_flush());
 6489   match(CacheWBPreSync);
 6490 
 6491   ins_cost(100);
 6492   format %{"cache wb presync" %}
 6493   ins_encode %{
 6494     __ cache_wbsync(true);
 6495   %}
 6496   ins_pipe(pipe_slow); // XXX
 6497 %}
 6498 
 6499 instruct cacheWBPostSync()
 6500 %{
 6501   predicate(VM_Version::supports_data_cache_line_flush());
 6502   match(CacheWBPostSync);
 6503 
 6504   ins_cost(100);
 6505   format %{"cache wb postsync" %}
 6506   ins_encode %{
 6507     __ cache_wbsync(false);
 6508   %}
 6509   ins_pipe(pipe_slow); // XXX
 6510 %}
 6511 
 6512 //----------BSWAP Instructions-------------------------------------------------
 6513 instruct bytes_reverse_int(rRegI dst) %{
 6514   match(Set dst (ReverseBytesI dst));
 6515 
 6516   format %{ "bswapl  $dst" %}
 6517   ins_encode %{
 6518     __ bswapl($dst$$Register);
 6519   %}
 6520   ins_pipe( ialu_reg );
 6521 %}
 6522 
 6523 instruct bytes_reverse_long(rRegL dst) %{
 6524   match(Set dst (ReverseBytesL dst));
 6525 
 6526   format %{ "bswapq  $dst" %}
 6527   ins_encode %{
 6528     __ bswapq($dst$$Register);
 6529   %}
 6530   ins_pipe( ialu_reg);
 6531 %}
 6532 
 6533 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 6534   match(Set dst (ReverseBytesUS dst));
 6535   effect(KILL cr);
 6536 
 6537   format %{ "bswapl  $dst\n\t"
 6538             "shrl    $dst,16\n\t" %}
 6539   ins_encode %{
 6540     __ bswapl($dst$$Register);
 6541     __ shrl($dst$$Register, 16);
 6542   %}
 6543   ins_pipe( ialu_reg );
 6544 %}
 6545 
 6546 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 6547   match(Set dst (ReverseBytesS dst));
 6548   effect(KILL cr);
 6549 
 6550   format %{ "bswapl  $dst\n\t"
 6551             "sar     $dst,16\n\t" %}
 6552   ins_encode %{
 6553     __ bswapl($dst$$Register);
 6554     __ sarl($dst$$Register, 16);
 6555   %}
 6556   ins_pipe( ialu_reg );
 6557 %}
 6558 
 6559 //---------- Zeros Count Instructions ------------------------------------------
 6560 
 6561 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6562   predicate(UseCountLeadingZerosInstruction);
 6563   match(Set dst (CountLeadingZerosI src));
 6564   effect(KILL cr);
 6565 
 6566   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6567   ins_encode %{
 6568     __ lzcntl($dst$$Register, $src$$Register);
 6569   %}
 6570   ins_pipe(ialu_reg);
 6571 %}
 6572 
 6573 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6574   predicate(UseCountLeadingZerosInstruction);
 6575   match(Set dst (CountLeadingZerosI (LoadI src)));
 6576   effect(KILL cr);
 6577   ins_cost(175);
 6578   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6579   ins_encode %{
 6580     __ lzcntl($dst$$Register, $src$$Address);
 6581   %}
 6582   ins_pipe(ialu_reg_mem);
 6583 %}
 6584 
 6585 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 6586   predicate(!UseCountLeadingZerosInstruction);
 6587   match(Set dst (CountLeadingZerosI src));
 6588   effect(KILL cr);
 6589 
 6590   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 6591             "jnz     skip\n\t"
 6592             "movl    $dst, -1\n"
 6593       "skip:\n\t"
 6594             "negl    $dst\n\t"
 6595             "addl    $dst, 31" %}
 6596   ins_encode %{
 6597     Register Rdst = $dst$$Register;
 6598     Register Rsrc = $src$$Register;
 6599     Label skip;
 6600     __ bsrl(Rdst, Rsrc);
 6601     __ jccb(Assembler::notZero, skip);
 6602     __ movl(Rdst, -1);
 6603     __ bind(skip);
 6604     __ negl(Rdst);
 6605     __ addl(Rdst, BitsPerInt - 1);
 6606   %}
 6607   ins_pipe(ialu_reg);
 6608 %}
 6609 
 6610 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6611   predicate(UseCountLeadingZerosInstruction);
 6612   match(Set dst (CountLeadingZerosL src));
 6613   effect(KILL cr);
 6614 
 6615   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6616   ins_encode %{
 6617     __ lzcntq($dst$$Register, $src$$Register);
 6618   %}
 6619   ins_pipe(ialu_reg);
 6620 %}
 6621 
 6622 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6623   predicate(UseCountLeadingZerosInstruction);
 6624   match(Set dst (CountLeadingZerosL (LoadL src)));
 6625   effect(KILL cr);
 6626   ins_cost(175);
 6627   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6628   ins_encode %{
 6629     __ lzcntq($dst$$Register, $src$$Address);
 6630   %}
 6631   ins_pipe(ialu_reg_mem);
 6632 %}
 6633 
 6634 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 6635   predicate(!UseCountLeadingZerosInstruction);
 6636   match(Set dst (CountLeadingZerosL src));
 6637   effect(KILL cr);
 6638 
 6639   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 6640             "jnz     skip\n\t"
 6641             "movl    $dst, -1\n"
 6642       "skip:\n\t"
 6643             "negl    $dst\n\t"
 6644             "addl    $dst, 63" %}
 6645   ins_encode %{
 6646     Register Rdst = $dst$$Register;
 6647     Register Rsrc = $src$$Register;
 6648     Label skip;
 6649     __ bsrq(Rdst, Rsrc);
 6650     __ jccb(Assembler::notZero, skip);
 6651     __ movl(Rdst, -1);
 6652     __ bind(skip);
 6653     __ negl(Rdst);
 6654     __ addl(Rdst, BitsPerLong - 1);
 6655   %}
 6656   ins_pipe(ialu_reg);
 6657 %}
 6658 
 6659 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6660   predicate(UseCountTrailingZerosInstruction);
 6661   match(Set dst (CountTrailingZerosI src));
 6662   effect(KILL cr);
 6663 
 6664   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6665   ins_encode %{
 6666     __ tzcntl($dst$$Register, $src$$Register);
 6667   %}
 6668   ins_pipe(ialu_reg);
 6669 %}
 6670 
 6671 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6672   predicate(UseCountTrailingZerosInstruction);
 6673   match(Set dst (CountTrailingZerosI (LoadI src)));
 6674   effect(KILL cr);
 6675   ins_cost(175);
 6676   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6677   ins_encode %{
 6678     __ tzcntl($dst$$Register, $src$$Address);
 6679   %}
 6680   ins_pipe(ialu_reg_mem);
 6681 %}
 6682 
 6683 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 6684   predicate(!UseCountTrailingZerosInstruction);
 6685   match(Set dst (CountTrailingZerosI src));
 6686   effect(KILL cr);
 6687 
 6688   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 6689             "jnz     done\n\t"
 6690             "movl    $dst, 32\n"
 6691       "done:" %}
 6692   ins_encode %{
 6693     Register Rdst = $dst$$Register;
 6694     Label done;
 6695     __ bsfl(Rdst, $src$$Register);
 6696     __ jccb(Assembler::notZero, done);
 6697     __ movl(Rdst, BitsPerInt);
 6698     __ bind(done);
 6699   %}
 6700   ins_pipe(ialu_reg);
 6701 %}
 6702 
 6703 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6704   predicate(UseCountTrailingZerosInstruction);
 6705   match(Set dst (CountTrailingZerosL src));
 6706   effect(KILL cr);
 6707 
 6708   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6709   ins_encode %{
 6710     __ tzcntq($dst$$Register, $src$$Register);
 6711   %}
 6712   ins_pipe(ialu_reg);
 6713 %}
 6714 
 6715 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6716   predicate(UseCountTrailingZerosInstruction);
 6717   match(Set dst (CountTrailingZerosL (LoadL src)));
 6718   effect(KILL cr);
 6719   ins_cost(175);
 6720   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6721   ins_encode %{
 6722     __ tzcntq($dst$$Register, $src$$Address);
 6723   %}
 6724   ins_pipe(ialu_reg_mem);
 6725 %}
 6726 
 6727 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 6728   predicate(!UseCountTrailingZerosInstruction);
 6729   match(Set dst (CountTrailingZerosL src));
 6730   effect(KILL cr);
 6731 
 6732   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 6733             "jnz     done\n\t"
 6734             "movl    $dst, 64\n"
 6735       "done:" %}
 6736   ins_encode %{
 6737     Register Rdst = $dst$$Register;
 6738     Label done;
 6739     __ bsfq(Rdst, $src$$Register);
 6740     __ jccb(Assembler::notZero, done);
 6741     __ movl(Rdst, BitsPerLong);
 6742     __ bind(done);
 6743   %}
 6744   ins_pipe(ialu_reg);
 6745 %}
 6746 
 6747 //--------------- Reverse Operation Instructions ----------------
 6748 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 6749   predicate(!VM_Version::supports_gfni());
 6750   match(Set dst (ReverseI src));
 6751   effect(TEMP dst, TEMP rtmp, KILL cr);
 6752   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 6753   ins_encode %{
 6754     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 6755   %}
 6756   ins_pipe( ialu_reg );
 6757 %}
 6758 
 6759 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6760   predicate(VM_Version::supports_gfni());
 6761   match(Set dst (ReverseI src));
 6762   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6763   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6764   ins_encode %{
 6765     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 6766   %}
 6767   ins_pipe( ialu_reg );
 6768 %}
 6769 
 6770 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 6771   predicate(!VM_Version::supports_gfni());
 6772   match(Set dst (ReverseL src));
 6773   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 6774   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 6775   ins_encode %{
 6776     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 6777   %}
 6778   ins_pipe( ialu_reg );
 6779 %}
 6780 
 6781 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6782   predicate(VM_Version::supports_gfni());
 6783   match(Set dst (ReverseL src));
 6784   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6785   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6786   ins_encode %{
 6787     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 6788   %}
 6789   ins_pipe( ialu_reg );
 6790 %}
 6791 
 6792 //---------- Population Count Instructions -------------------------------------
 6793 
 6794 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6795   predicate(UsePopCountInstruction);
 6796   match(Set dst (PopCountI src));
 6797   effect(KILL cr);
 6798 
 6799   format %{ "popcnt  $dst, $src" %}
 6800   ins_encode %{
 6801     __ popcntl($dst$$Register, $src$$Register);
 6802   %}
 6803   ins_pipe(ialu_reg);
 6804 %}
 6805 
 6806 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6807   predicate(UsePopCountInstruction);
 6808   match(Set dst (PopCountI (LoadI mem)));
 6809   effect(KILL cr);
 6810 
 6811   format %{ "popcnt  $dst, $mem" %}
 6812   ins_encode %{
 6813     __ popcntl($dst$$Register, $mem$$Address);
 6814   %}
 6815   ins_pipe(ialu_reg);
 6816 %}
 6817 
 6818 // Note: Long.bitCount(long) returns an int.
 6819 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6820   predicate(UsePopCountInstruction);
 6821   match(Set dst (PopCountL src));
 6822   effect(KILL cr);
 6823 
 6824   format %{ "popcnt  $dst, $src" %}
 6825   ins_encode %{
 6826     __ popcntq($dst$$Register, $src$$Register);
 6827   %}
 6828   ins_pipe(ialu_reg);
 6829 %}
 6830 
 6831 // Note: Long.bitCount(long) returns an int.
 6832 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6833   predicate(UsePopCountInstruction);
 6834   match(Set dst (PopCountL (LoadL mem)));
 6835   effect(KILL cr);
 6836 
 6837   format %{ "popcnt  $dst, $mem" %}
 6838   ins_encode %{
 6839     __ popcntq($dst$$Register, $mem$$Address);
 6840   %}
 6841   ins_pipe(ialu_reg);
 6842 %}
 6843 
 6844 
 6845 //----------MemBar Instructions-----------------------------------------------
 6846 // Memory barrier flavors
 6847 
 6848 instruct membar_acquire()
 6849 %{
 6850   match(MemBarAcquire);
 6851   match(LoadFence);
 6852   ins_cost(0);
 6853 
 6854   size(0);
 6855   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6856   ins_encode();
 6857   ins_pipe(empty);
 6858 %}
 6859 
 6860 instruct membar_acquire_lock()
 6861 %{
 6862   match(MemBarAcquireLock);
 6863   ins_cost(0);
 6864 
 6865   size(0);
 6866   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6867   ins_encode();
 6868   ins_pipe(empty);
 6869 %}
 6870 
 6871 instruct membar_release()
 6872 %{
 6873   match(MemBarRelease);
 6874   match(StoreFence);
 6875   ins_cost(0);
 6876 
 6877   size(0);
 6878   format %{ "MEMBAR-release ! (empty encoding)" %}
 6879   ins_encode();
 6880   ins_pipe(empty);
 6881 %}
 6882 
 6883 instruct membar_release_lock()
 6884 %{
 6885   match(MemBarReleaseLock);
 6886   ins_cost(0);
 6887 
 6888   size(0);
 6889   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6890   ins_encode();
 6891   ins_pipe(empty);
 6892 %}
 6893 
 6894 instruct membar_volatile(rFlagsReg cr) %{
 6895   match(MemBarVolatile);
 6896   effect(KILL cr);
 6897   ins_cost(400);
 6898 
 6899   format %{
 6900     $$template
 6901     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6902   %}
 6903   ins_encode %{
 6904     __ membar(Assembler::StoreLoad);
 6905   %}
 6906   ins_pipe(pipe_slow);
 6907 %}
 6908 
 6909 instruct unnecessary_membar_volatile()
 6910 %{
 6911   match(MemBarVolatile);
 6912   predicate(Matcher::post_store_load_barrier(n));
 6913   ins_cost(0);
 6914 
 6915   size(0);
 6916   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6917   ins_encode();
 6918   ins_pipe(empty);
 6919 %}
 6920 
 6921 instruct membar_storestore() %{
 6922   match(MemBarStoreStore);
 6923   match(StoreStoreFence);
 6924   ins_cost(0);
 6925 
 6926   size(0);
 6927   format %{ "MEMBAR-storestore (empty encoding)" %}
 6928   ins_encode( );
 6929   ins_pipe(empty);
 6930 %}
 6931 
 6932 //----------Move Instructions--------------------------------------------------
 6933 
 6934 instruct castX2P(rRegP dst, rRegL src)
 6935 %{
 6936   match(Set dst (CastX2P src));
 6937 
 6938   format %{ "movq    $dst, $src\t# long->ptr" %}
 6939   ins_encode %{
 6940     if ($dst$$reg != $src$$reg) {
 6941       __ movptr($dst$$Register, $src$$Register);
 6942     }
 6943   %}
 6944   ins_pipe(ialu_reg_reg); // XXX
 6945 %}
 6946 
 6947 instruct castP2X(rRegL dst, rRegP src)
 6948 %{
 6949   match(Set dst (CastP2X src));
 6950 
 6951   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6952   ins_encode %{
 6953     if ($dst$$reg != $src$$reg) {
 6954       __ movptr($dst$$Register, $src$$Register);
 6955     }
 6956   %}
 6957   ins_pipe(ialu_reg_reg); // XXX
 6958 %}
 6959 
 6960 // Convert oop into int for vectors alignment masking
 6961 instruct convP2I(rRegI dst, rRegP src)
 6962 %{
 6963   match(Set dst (ConvL2I (CastP2X src)));
 6964 
 6965   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6966   ins_encode %{
 6967     __ movl($dst$$Register, $src$$Register);
 6968   %}
 6969   ins_pipe(ialu_reg_reg); // XXX
 6970 %}
 6971 
 6972 // Convert compressed oop into int for vectors alignment masking
 6973 // in case of 32bit oops (heap < 4Gb).
 6974 instruct convN2I(rRegI dst, rRegN src)
 6975 %{
 6976   predicate(CompressedOops::shift() == 0);
 6977   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6978 
 6979   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6980   ins_encode %{
 6981     __ movl($dst$$Register, $src$$Register);
 6982   %}
 6983   ins_pipe(ialu_reg_reg); // XXX
 6984 %}
 6985 
 6986 // Convert oop pointer into compressed form
 6987 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 6988   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 6989   match(Set dst (EncodeP src));
 6990   effect(KILL cr);
 6991   format %{ "encode_heap_oop $dst,$src" %}
 6992   ins_encode %{
 6993     Register s = $src$$Register;
 6994     Register d = $dst$$Register;
 6995     if (s != d) {
 6996       __ movq(d, s);
 6997     }
 6998     __ encode_heap_oop(d);
 6999   %}
 7000   ins_pipe(ialu_reg_long);
 7001 %}
 7002 
 7003 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7004   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 7005   match(Set dst (EncodeP src));
 7006   effect(KILL cr);
 7007   format %{ "encode_heap_oop_not_null $dst,$src" %}
 7008   ins_encode %{
 7009     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 7010   %}
 7011   ins_pipe(ialu_reg_long);
 7012 %}
 7013 
 7014 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 7015   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 7016             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 7017   match(Set dst (DecodeN src));
 7018   effect(KILL cr);
 7019   format %{ "decode_heap_oop $dst,$src" %}
 7020   ins_encode %{
 7021     Register s = $src$$Register;
 7022     Register d = $dst$$Register;
 7023     if (s != d) {
 7024       __ movq(d, s);
 7025     }
 7026     __ decode_heap_oop(d);
 7027   %}
 7028   ins_pipe(ialu_reg_long);
 7029 %}
 7030 
 7031 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7032   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 7033             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 7034   match(Set dst (DecodeN src));
 7035   effect(KILL cr);
 7036   format %{ "decode_heap_oop_not_null $dst,$src" %}
 7037   ins_encode %{
 7038     Register s = $src$$Register;
 7039     Register d = $dst$$Register;
 7040     if (s != d) {
 7041       __ decode_heap_oop_not_null(d, s);
 7042     } else {
 7043       __ decode_heap_oop_not_null(d);
 7044     }
 7045   %}
 7046   ins_pipe(ialu_reg_long);
 7047 %}
 7048 
 7049 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7050   match(Set dst (EncodePKlass src));
 7051   effect(TEMP dst, KILL cr);
 7052   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 7053   ins_encode %{
 7054     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7055   %}
 7056   ins_pipe(ialu_reg_long);
 7057 %}
 7058 
 7059 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7060   match(Set dst (DecodeNKlass src));
 7061   effect(TEMP dst, KILL cr);
 7062   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 7063   ins_encode %{
 7064     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7065   %}
 7066   ins_pipe(ialu_reg_long);
 7067 %}
 7068 
 7069 //----------Conditional Move---------------------------------------------------
 7070 // Jump
 7071 // dummy instruction for generating temp registers
 7072 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 7073   match(Jump (LShiftL switch_val shift));
 7074   ins_cost(350);
 7075   predicate(false);
 7076   effect(TEMP dest);
 7077 
 7078   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7079             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 7080   ins_encode %{
 7081     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7082     // to do that and the compiler is using that register as one it can allocate.
 7083     // So we build it all by hand.
 7084     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 7085     // ArrayAddress dispatch(table, index);
 7086     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 7087     __ lea($dest$$Register, $constantaddress);
 7088     __ jmp(dispatch);
 7089   %}
 7090   ins_pipe(pipe_jmp);
 7091 %}
 7092 
 7093 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 7094   match(Jump (AddL (LShiftL switch_val shift) offset));
 7095   ins_cost(350);
 7096   effect(TEMP dest);
 7097 
 7098   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7099             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 7100   ins_encode %{
 7101     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7102     // to do that and the compiler is using that register as one it can allocate.
 7103     // So we build it all by hand.
 7104     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7105     // ArrayAddress dispatch(table, index);
 7106     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7107     __ lea($dest$$Register, $constantaddress);
 7108     __ jmp(dispatch);
 7109   %}
 7110   ins_pipe(pipe_jmp);
 7111 %}
 7112 
 7113 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 7114   match(Jump switch_val);
 7115   ins_cost(350);
 7116   effect(TEMP dest);
 7117 
 7118   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7119             "jmp     [$dest + $switch_val]\n\t" %}
 7120   ins_encode %{
 7121     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7122     // to do that and the compiler is using that register as one it can allocate.
 7123     // So we build it all by hand.
 7124     // Address index(noreg, switch_reg, Address::times_1);
 7125     // ArrayAddress dispatch(table, index);
 7126     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 7127     __ lea($dest$$Register, $constantaddress);
 7128     __ jmp(dispatch);
 7129   %}
 7130   ins_pipe(pipe_jmp);
 7131 %}
 7132 
 7133 // Conditional move
 7134 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7135 %{
 7136   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7137   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7138 
 7139   ins_cost(100); // XXX
 7140   format %{ "setbn$cop $dst\t# signed, int" %}
 7141   ins_encode %{
 7142     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7143     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7144   %}
 7145   ins_pipe(ialu_reg);
 7146 %}
 7147 
 7148 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 7149 %{
 7150   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7151 
 7152   ins_cost(200); // XXX
 7153   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7154   ins_encode %{
 7155     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7156   %}
 7157   ins_pipe(pipe_cmov_reg);
 7158 %}
 7159 
 7160 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7161 %{
 7162   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7163   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7164 
 7165   ins_cost(100); // XXX
 7166   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7167   ins_encode %{
 7168     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7169     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7170   %}
 7171   ins_pipe(ialu_reg);
 7172 %}
 7173 
 7174 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 7175   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7176 
 7177   ins_cost(200); // XXX
 7178   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7179   ins_encode %{
 7180     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7181   %}
 7182   ins_pipe(pipe_cmov_reg);
 7183 %}
 7184 
 7185 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7186 %{
 7187   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7188   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7189 
 7190   ins_cost(100); // XXX
 7191   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7192   ins_encode %{
 7193     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7194     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7195   %}
 7196   ins_pipe(ialu_reg);
 7197 %}
 7198 
 7199 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7200   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7201   ins_cost(200);
 7202   expand %{
 7203     cmovI_regU(cop, cr, dst, src);
 7204   %}
 7205 %}
 7206 
 7207 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7208   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7209   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7210 
 7211   ins_cost(200); // XXX
 7212   format %{ "cmovpl  $dst, $src\n\t"
 7213             "cmovnel $dst, $src" %}
 7214   ins_encode %{
 7215     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7216     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7217   %}
 7218   ins_pipe(pipe_cmov_reg);
 7219 %}
 7220 
 7221 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7222 // inputs of the CMove
 7223 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7224   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7225   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7226 
 7227   ins_cost(200); // XXX
 7228   format %{ "cmovpl  $dst, $src\n\t"
 7229             "cmovnel $dst, $src" %}
 7230   ins_encode %{
 7231     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7232     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7233   %}
 7234   ins_pipe(pipe_cmov_reg);
 7235 %}
 7236 
 7237 // Conditional move
 7238 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 7239   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7240 
 7241   ins_cost(250); // XXX
 7242   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7243   ins_encode %{
 7244     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7245   %}
 7246   ins_pipe(pipe_cmov_mem);
 7247 %}
 7248 
 7249 // Conditional move
 7250 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 7251 %{
 7252   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7253 
 7254   ins_cost(250); // XXX
 7255   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7256   ins_encode %{
 7257     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7258   %}
 7259   ins_pipe(pipe_cmov_mem);
 7260 %}
 7261 
 7262 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 7263   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7264   ins_cost(250);
 7265   expand %{
 7266     cmovI_memU(cop, cr, dst, src);
 7267   %}
 7268 %}
 7269 
 7270 // Conditional move
 7271 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 7272 %{
 7273   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7274 
 7275   ins_cost(200); // XXX
 7276   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 7277   ins_encode %{
 7278     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7279   %}
 7280   ins_pipe(pipe_cmov_reg);
 7281 %}
 7282 
 7283 // Conditional move
 7284 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 7285 %{
 7286   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7287 
 7288   ins_cost(200); // XXX
 7289   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 7290   ins_encode %{
 7291     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7292   %}
 7293   ins_pipe(pipe_cmov_reg);
 7294 %}
 7295 
 7296 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7297   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7298   ins_cost(200);
 7299   expand %{
 7300     cmovN_regU(cop, cr, dst, src);
 7301   %}
 7302 %}
 7303 
 7304 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7305   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7306   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7307 
 7308   ins_cost(200); // XXX
 7309   format %{ "cmovpl  $dst, $src\n\t"
 7310             "cmovnel $dst, $src" %}
 7311   ins_encode %{
 7312     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7313     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7314   %}
 7315   ins_pipe(pipe_cmov_reg);
 7316 %}
 7317 
 7318 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7319 // inputs of the CMove
 7320 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7321   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7322   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 7323 
 7324   ins_cost(200); // XXX
 7325   format %{ "cmovpl  $dst, $src\n\t"
 7326             "cmovnel $dst, $src" %}
 7327   ins_encode %{
 7328     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7329     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7330   %}
 7331   ins_pipe(pipe_cmov_reg);
 7332 %}
 7333 
 7334 // Conditional move
 7335 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 7336 %{
 7337   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7338 
 7339   ins_cost(200); // XXX
 7340   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 7341   ins_encode %{
 7342     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7343   %}
 7344   ins_pipe(pipe_cmov_reg);  // XXX
 7345 %}
 7346 
 7347 // Conditional move
 7348 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 7349 %{
 7350   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7351 
 7352   ins_cost(200); // XXX
 7353   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 7354   ins_encode %{
 7355     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7356   %}
 7357   ins_pipe(pipe_cmov_reg); // XXX
 7358 %}
 7359 
 7360 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7361   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7362   ins_cost(200);
 7363   expand %{
 7364     cmovP_regU(cop, cr, dst, src);
 7365   %}
 7366 %}
 7367 
 7368 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7369   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7370   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7371 
 7372   ins_cost(200); // XXX
 7373   format %{ "cmovpq  $dst, $src\n\t"
 7374             "cmovneq $dst, $src" %}
 7375   ins_encode %{
 7376     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7377     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7378   %}
 7379   ins_pipe(pipe_cmov_reg);
 7380 %}
 7381 
 7382 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7383 // inputs of the CMove
 7384 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7385   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7386   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 7387 
 7388   ins_cost(200); // XXX
 7389   format %{ "cmovpq  $dst, $src\n\t"
 7390             "cmovneq $dst, $src" %}
 7391   ins_encode %{
 7392     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7393     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7394   %}
 7395   ins_pipe(pipe_cmov_reg);
 7396 %}
 7397 
 7398 // DISABLED: Requires the ADLC to emit a bottom_type call that
 7399 // correctly meets the two pointer arguments; one is an incoming
 7400 // register but the other is a memory operand.  ALSO appears to
 7401 // be buggy with implicit null checks.
 7402 //
 7403 //// Conditional move
 7404 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
 7405 //%{
 7406 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7407 //  ins_cost(250);
 7408 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7409 //  opcode(0x0F,0x40);
 7410 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7411 //  ins_pipe( pipe_cmov_mem );
 7412 //%}
 7413 //
 7414 //// Conditional move
 7415 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
 7416 //%{
 7417 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7418 //  ins_cost(250);
 7419 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7420 //  opcode(0x0F,0x40);
 7421 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7422 //  ins_pipe( pipe_cmov_mem );
 7423 //%}
 7424 
 7425 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7426 %{
 7427   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7428   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7429 
 7430   ins_cost(100); // XXX
 7431   format %{ "setbn$cop $dst\t# signed, long" %}
 7432   ins_encode %{
 7433     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7434     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7435   %}
 7436   ins_pipe(ialu_reg);
 7437 %}
 7438 
 7439 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 7440 %{
 7441   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7442 
 7443   ins_cost(200); // XXX
 7444   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7445   ins_encode %{
 7446     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7447   %}
 7448   ins_pipe(pipe_cmov_reg);  // XXX
 7449 %}
 7450 
 7451 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 7452 %{
 7453   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7454 
 7455   ins_cost(200); // XXX
 7456   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7457   ins_encode %{
 7458     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7459   %}
 7460   ins_pipe(pipe_cmov_mem);  // XXX
 7461 %}
 7462 
 7463 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7464 %{
 7465   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7466   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7467 
 7468   ins_cost(100); // XXX
 7469   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7470   ins_encode %{
 7471     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7472     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7473   %}
 7474   ins_pipe(ialu_reg);
 7475 %}
 7476 
 7477 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 7478 %{
 7479   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7480 
 7481   ins_cost(200); // XXX
 7482   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7483   ins_encode %{
 7484     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7485   %}
 7486   ins_pipe(pipe_cmov_reg); // XXX
 7487 %}
 7488 
 7489 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7490 %{
 7491   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7492   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7493 
 7494   ins_cost(100); // XXX
 7495   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7496   ins_encode %{
 7497     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7498     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7499   %}
 7500   ins_pipe(ialu_reg);
 7501 %}
 7502 
 7503 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7504   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7505   ins_cost(200);
 7506   expand %{
 7507     cmovL_regU(cop, cr, dst, src);
 7508   %}
 7509 %}
 7510 
 7511 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7512   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7513   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7514 
 7515   ins_cost(200); // XXX
 7516   format %{ "cmovpq  $dst, $src\n\t"
 7517             "cmovneq $dst, $src" %}
 7518   ins_encode %{
 7519     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7520     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7521   %}
 7522   ins_pipe(pipe_cmov_reg);
 7523 %}
 7524 
 7525 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7526 // inputs of the CMove
 7527 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7528   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7529   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7530 
 7531   ins_cost(200); // XXX
 7532   format %{ "cmovpq  $dst, $src\n\t"
 7533             "cmovneq $dst, $src" %}
 7534   ins_encode %{
 7535     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7536     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7537   %}
 7538   ins_pipe(pipe_cmov_reg);
 7539 %}
 7540 
 7541 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 7542 %{
 7543   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7544 
 7545   ins_cost(200); // XXX
 7546   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7547   ins_encode %{
 7548     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7549   %}
 7550   ins_pipe(pipe_cmov_mem); // XXX
 7551 %}
 7552 
 7553 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 7554   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7555   ins_cost(200);
 7556   expand %{
 7557     cmovL_memU(cop, cr, dst, src);
 7558   %}
 7559 %}
 7560 
 7561 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 7562 %{
 7563   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7564 
 7565   ins_cost(200); // XXX
 7566   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7567             "movss     $dst, $src\n"
 7568     "skip:" %}
 7569   ins_encode %{
 7570     Label Lskip;
 7571     // Invert sense of branch from sense of CMOV
 7572     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7573     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7574     __ bind(Lskip);
 7575   %}
 7576   ins_pipe(pipe_slow);
 7577 %}
 7578 
 7579 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
 7580 // %{
 7581 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
 7582 
 7583 //   ins_cost(200); // XXX
 7584 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7585 //             "movss     $dst, $src\n"
 7586 //     "skip:" %}
 7587 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
 7588 //   ins_pipe(pipe_slow);
 7589 // %}
 7590 
 7591 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 7592 %{
 7593   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7594 
 7595   ins_cost(200); // XXX
 7596   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 7597             "movss     $dst, $src\n"
 7598     "skip:" %}
 7599   ins_encode %{
 7600     Label Lskip;
 7601     // Invert sense of branch from sense of CMOV
 7602     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7603     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7604     __ bind(Lskip);
 7605   %}
 7606   ins_pipe(pipe_slow);
 7607 %}
 7608 
 7609 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 7610   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7611   ins_cost(200);
 7612   expand %{
 7613     cmovF_regU(cop, cr, dst, src);
 7614   %}
 7615 %}
 7616 
 7617 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 7618 %{
 7619   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7620 
 7621   ins_cost(200); // XXX
 7622   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 7623             "movsd     $dst, $src\n"
 7624     "skip:" %}
 7625   ins_encode %{
 7626     Label Lskip;
 7627     // Invert sense of branch from sense of CMOV
 7628     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7629     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7630     __ bind(Lskip);
 7631   %}
 7632   ins_pipe(pipe_slow);
 7633 %}
 7634 
 7635 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 7636 %{
 7637   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7638 
 7639   ins_cost(200); // XXX
 7640   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 7641             "movsd     $dst, $src\n"
 7642     "skip:" %}
 7643   ins_encode %{
 7644     Label Lskip;
 7645     // Invert sense of branch from sense of CMOV
 7646     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7647     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7648     __ bind(Lskip);
 7649   %}
 7650   ins_pipe(pipe_slow);
 7651 %}
 7652 
 7653 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 7654   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7655   ins_cost(200);
 7656   expand %{
 7657     cmovD_regU(cop, cr, dst, src);
 7658   %}
 7659 %}
 7660 
 7661 //----------Arithmetic Instructions--------------------------------------------
 7662 //----------Addition Instructions----------------------------------------------
 7663 
 7664 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7665 %{
 7666   match(Set dst (AddI dst src));
 7667   effect(KILL cr);
 7668 
 7669   format %{ "addl    $dst, $src\t# int" %}
 7670   ins_encode %{
 7671     __ addl($dst$$Register, $src$$Register);
 7672   %}
 7673   ins_pipe(ialu_reg_reg);
 7674 %}
 7675 
 7676 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 7677 %{
 7678   match(Set dst (AddI dst src));
 7679   effect(KILL cr);
 7680 
 7681   format %{ "addl    $dst, $src\t# int" %}
 7682   ins_encode %{
 7683     __ addl($dst$$Register, $src$$constant);
 7684   %}
 7685   ins_pipe( ialu_reg );
 7686 %}
 7687 
 7688 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7689 %{
 7690   match(Set dst (AddI dst (LoadI src)));
 7691   effect(KILL cr);
 7692 
 7693   ins_cost(150); // XXX
 7694   format %{ "addl    $dst, $src\t# int" %}
 7695   ins_encode %{
 7696     __ addl($dst$$Register, $src$$Address);
 7697   %}
 7698   ins_pipe(ialu_reg_mem);
 7699 %}
 7700 
 7701 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7702 %{
 7703   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7704   effect(KILL cr);
 7705 
 7706   ins_cost(150); // XXX
 7707   format %{ "addl    $dst, $src\t# int" %}
 7708   ins_encode %{
 7709     __ addl($dst$$Address, $src$$Register);
 7710   %}
 7711   ins_pipe(ialu_mem_reg);
 7712 %}
 7713 
 7714 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 7715 %{
 7716   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7717   effect(KILL cr);
 7718 
 7719   ins_cost(125); // XXX
 7720   format %{ "addl    $dst, $src\t# int" %}
 7721   ins_encode %{
 7722     __ addl($dst$$Address, $src$$constant);
 7723   %}
 7724   ins_pipe(ialu_mem_imm);
 7725 %}
 7726 
 7727 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 7728 %{
 7729   predicate(UseIncDec);
 7730   match(Set dst (AddI dst src));
 7731   effect(KILL cr);
 7732 
 7733   format %{ "incl    $dst\t# int" %}
 7734   ins_encode %{
 7735     __ incrementl($dst$$Register);
 7736   %}
 7737   ins_pipe(ialu_reg);
 7738 %}
 7739 
 7740 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 7741 %{
 7742   predicate(UseIncDec);
 7743   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7744   effect(KILL cr);
 7745 
 7746   ins_cost(125); // XXX
 7747   format %{ "incl    $dst\t# int" %}
 7748   ins_encode %{
 7749     __ incrementl($dst$$Address);
 7750   %}
 7751   ins_pipe(ialu_mem_imm);
 7752 %}
 7753 
 7754 // XXX why does that use AddI
 7755 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 7756 %{
 7757   predicate(UseIncDec);
 7758   match(Set dst (AddI dst src));
 7759   effect(KILL cr);
 7760 
 7761   format %{ "decl    $dst\t# int" %}
 7762   ins_encode %{
 7763     __ decrementl($dst$$Register);
 7764   %}
 7765   ins_pipe(ialu_reg);
 7766 %}
 7767 
 7768 // XXX why does that use AddI
 7769 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 7770 %{
 7771   predicate(UseIncDec);
 7772   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7773   effect(KILL cr);
 7774 
 7775   ins_cost(125); // XXX
 7776   format %{ "decl    $dst\t# int" %}
 7777   ins_encode %{
 7778     __ decrementl($dst$$Address);
 7779   %}
 7780   ins_pipe(ialu_mem_imm);
 7781 %}
 7782 
 7783 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 7784 %{
 7785   predicate(VM_Version::supports_fast_2op_lea());
 7786   match(Set dst (AddI (LShiftI index scale) disp));
 7787 
 7788   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 7789   ins_encode %{
 7790     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7791     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7792   %}
 7793   ins_pipe(ialu_reg_reg);
 7794 %}
 7795 
 7796 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 7797 %{
 7798   predicate(VM_Version::supports_fast_3op_lea());
 7799   match(Set dst (AddI (AddI base index) disp));
 7800 
 7801   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 7802   ins_encode %{
 7803     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7804   %}
 7805   ins_pipe(ialu_reg_reg);
 7806 %}
 7807 
 7808 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 7809 %{
 7810   predicate(VM_Version::supports_fast_2op_lea());
 7811   match(Set dst (AddI base (LShiftI index scale)));
 7812 
 7813   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 7814   ins_encode %{
 7815     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7816     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7817   %}
 7818   ins_pipe(ialu_reg_reg);
 7819 %}
 7820 
 7821 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 7822 %{
 7823   predicate(VM_Version::supports_fast_3op_lea());
 7824   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 7825 
 7826   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 7827   ins_encode %{
 7828     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7829     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7830   %}
 7831   ins_pipe(ialu_reg_reg);
 7832 %}
 7833 
 7834 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7835 %{
 7836   match(Set dst (AddL dst src));
 7837   effect(KILL cr);
 7838 
 7839   format %{ "addq    $dst, $src\t# long" %}
 7840   ins_encode %{
 7841     __ addq($dst$$Register, $src$$Register);
 7842   %}
 7843   ins_pipe(ialu_reg_reg);
 7844 %}
 7845 
 7846 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 7847 %{
 7848   match(Set dst (AddL dst src));
 7849   effect(KILL cr);
 7850 
 7851   format %{ "addq    $dst, $src\t# long" %}
 7852   ins_encode %{
 7853     __ addq($dst$$Register, $src$$constant);
 7854   %}
 7855   ins_pipe( ialu_reg );
 7856 %}
 7857 
 7858 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7859 %{
 7860   match(Set dst (AddL dst (LoadL src)));
 7861   effect(KILL cr);
 7862 
 7863   ins_cost(150); // XXX
 7864   format %{ "addq    $dst, $src\t# long" %}
 7865   ins_encode %{
 7866     __ addq($dst$$Register, $src$$Address);
 7867   %}
 7868   ins_pipe(ialu_reg_mem);
 7869 %}
 7870 
 7871 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7872 %{
 7873   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7874   effect(KILL cr);
 7875 
 7876   ins_cost(150); // XXX
 7877   format %{ "addq    $dst, $src\t# long" %}
 7878   ins_encode %{
 7879     __ addq($dst$$Address, $src$$Register);
 7880   %}
 7881   ins_pipe(ialu_mem_reg);
 7882 %}
 7883 
 7884 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7885 %{
 7886   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7887   effect(KILL cr);
 7888 
 7889   ins_cost(125); // XXX
 7890   format %{ "addq    $dst, $src\t# long" %}
 7891   ins_encode %{
 7892     __ addq($dst$$Address, $src$$constant);
 7893   %}
 7894   ins_pipe(ialu_mem_imm);
 7895 %}
 7896 
 7897 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7898 %{
 7899   predicate(UseIncDec);
 7900   match(Set dst (AddL dst src));
 7901   effect(KILL cr);
 7902 
 7903   format %{ "incq    $dst\t# long" %}
 7904   ins_encode %{
 7905     __ incrementq($dst$$Register);
 7906   %}
 7907   ins_pipe(ialu_reg);
 7908 %}
 7909 
 7910 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7911 %{
 7912   predicate(UseIncDec);
 7913   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7914   effect(KILL cr);
 7915 
 7916   ins_cost(125); // XXX
 7917   format %{ "incq    $dst\t# long" %}
 7918   ins_encode %{
 7919     __ incrementq($dst$$Address);
 7920   %}
 7921   ins_pipe(ialu_mem_imm);
 7922 %}
 7923 
 7924 // XXX why does that use AddL
 7925 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7926 %{
 7927   predicate(UseIncDec);
 7928   match(Set dst (AddL dst src));
 7929   effect(KILL cr);
 7930 
 7931   format %{ "decq    $dst\t# long" %}
 7932   ins_encode %{
 7933     __ decrementq($dst$$Register);
 7934   %}
 7935   ins_pipe(ialu_reg);
 7936 %}
 7937 
 7938 // XXX why does that use AddL
 7939 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7940 %{
 7941   predicate(UseIncDec);
 7942   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7943   effect(KILL cr);
 7944 
 7945   ins_cost(125); // XXX
 7946   format %{ "decq    $dst\t# long" %}
 7947   ins_encode %{
 7948     __ decrementq($dst$$Address);
 7949   %}
 7950   ins_pipe(ialu_mem_imm);
 7951 %}
 7952 
 7953 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7954 %{
 7955   predicate(VM_Version::supports_fast_2op_lea());
 7956   match(Set dst (AddL (LShiftL index scale) disp));
 7957 
 7958   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7959   ins_encode %{
 7960     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7961     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7962   %}
 7963   ins_pipe(ialu_reg_reg);
 7964 %}
 7965 
 7966 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7967 %{
 7968   predicate(VM_Version::supports_fast_3op_lea());
 7969   match(Set dst (AddL (AddL base index) disp));
 7970 
 7971   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7972   ins_encode %{
 7973     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7974   %}
 7975   ins_pipe(ialu_reg_reg);
 7976 %}
 7977 
 7978 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7979 %{
 7980   predicate(VM_Version::supports_fast_2op_lea());
 7981   match(Set dst (AddL base (LShiftL index scale)));
 7982 
 7983   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 7984   ins_encode %{
 7985     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7986     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7987   %}
 7988   ins_pipe(ialu_reg_reg);
 7989 %}
 7990 
 7991 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 7992 %{
 7993   predicate(VM_Version::supports_fast_3op_lea());
 7994   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 7995 
 7996   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 7997   ins_encode %{
 7998     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7999     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 8000   %}
 8001   ins_pipe(ialu_reg_reg);
 8002 %}
 8003 
 8004 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 8005 %{
 8006   match(Set dst (AddP dst src));
 8007   effect(KILL cr);
 8008 
 8009   format %{ "addq    $dst, $src\t# ptr" %}
 8010   ins_encode %{
 8011     __ addq($dst$$Register, $src$$Register);
 8012   %}
 8013   ins_pipe(ialu_reg_reg);
 8014 %}
 8015 
 8016 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 8017 %{
 8018   match(Set dst (AddP dst src));
 8019   effect(KILL cr);
 8020 
 8021   format %{ "addq    $dst, $src\t# ptr" %}
 8022   ins_encode %{
 8023     __ addq($dst$$Register, $src$$constant);
 8024   %}
 8025   ins_pipe( ialu_reg );
 8026 %}
 8027 
 8028 // XXX addP mem ops ????
 8029 
 8030 instruct checkCastPP(rRegP dst)
 8031 %{
 8032   match(Set dst (CheckCastPP dst));
 8033 
 8034   size(0);
 8035   format %{ "# checkcastPP of $dst" %}
 8036   ins_encode(/* empty encoding */);
 8037   ins_pipe(empty);
 8038 %}
 8039 
 8040 instruct castPP(rRegP dst)
 8041 %{
 8042   match(Set dst (CastPP dst));
 8043 
 8044   size(0);
 8045   format %{ "# castPP of $dst" %}
 8046   ins_encode(/* empty encoding */);
 8047   ins_pipe(empty);
 8048 %}
 8049 
 8050 instruct castII(rRegI dst)
 8051 %{
 8052   match(Set dst (CastII dst));
 8053 
 8054   size(0);
 8055   format %{ "# castII of $dst" %}
 8056   ins_encode(/* empty encoding */);
 8057   ins_cost(0);
 8058   ins_pipe(empty);
 8059 %}
 8060 
 8061 instruct castLL(rRegL dst)
 8062 %{
 8063   match(Set dst (CastLL dst));
 8064 
 8065   size(0);
 8066   format %{ "# castLL of $dst" %}
 8067   ins_encode(/* empty encoding */);
 8068   ins_cost(0);
 8069   ins_pipe(empty);
 8070 %}
 8071 
 8072 instruct castFF(regF dst)
 8073 %{
 8074   match(Set dst (CastFF dst));
 8075 
 8076   size(0);
 8077   format %{ "# castFF of $dst" %}
 8078   ins_encode(/* empty encoding */);
 8079   ins_cost(0);
 8080   ins_pipe(empty);
 8081 %}
 8082 
 8083 instruct castDD(regD dst)
 8084 %{
 8085   match(Set dst (CastDD dst));
 8086 
 8087   size(0);
 8088   format %{ "# castDD of $dst" %}
 8089   ins_encode(/* empty encoding */);
 8090   ins_cost(0);
 8091   ins_pipe(empty);
 8092 %}
 8093 
 8094 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 8095 instruct compareAndSwapP(rRegI res,
 8096                          memory mem_ptr,
 8097                          rax_RegP oldval, rRegP newval,
 8098                          rFlagsReg cr)
 8099 %{
 8100   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8101   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 8102   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 8103   effect(KILL cr, KILL oldval);
 8104 
 8105   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8106             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8107             "sete    $res\n\t"
 8108             "movzbl  $res, $res" %}
 8109   ins_encode %{
 8110     __ lock();
 8111     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8112     __ setb(Assembler::equal, $res$$Register);
 8113     __ movzbl($res$$Register, $res$$Register);
 8114   %}
 8115   ins_pipe( pipe_cmpxchg );
 8116 %}
 8117 
 8118 instruct compareAndSwapL(rRegI res,
 8119                          memory mem_ptr,
 8120                          rax_RegL oldval, rRegL newval,
 8121                          rFlagsReg cr)
 8122 %{
 8123   predicate(VM_Version::supports_cx8());
 8124   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 8125   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 8126   effect(KILL cr, KILL oldval);
 8127 
 8128   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8129             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8130             "sete    $res\n\t"
 8131             "movzbl  $res, $res" %}
 8132   ins_encode %{
 8133     __ lock();
 8134     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8135     __ setb(Assembler::equal, $res$$Register);
 8136     __ movzbl($res$$Register, $res$$Register);
 8137   %}
 8138   ins_pipe( pipe_cmpxchg );
 8139 %}
 8140 
 8141 instruct compareAndSwapI(rRegI res,
 8142                          memory mem_ptr,
 8143                          rax_RegI oldval, rRegI newval,
 8144                          rFlagsReg cr)
 8145 %{
 8146   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 8147   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 8148   effect(KILL cr, KILL oldval);
 8149 
 8150   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8151             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8152             "sete    $res\n\t"
 8153             "movzbl  $res, $res" %}
 8154   ins_encode %{
 8155     __ lock();
 8156     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8157     __ setb(Assembler::equal, $res$$Register);
 8158     __ movzbl($res$$Register, $res$$Register);
 8159   %}
 8160   ins_pipe( pipe_cmpxchg );
 8161 %}
 8162 
 8163 instruct compareAndSwapB(rRegI res,
 8164                          memory mem_ptr,
 8165                          rax_RegI oldval, rRegI newval,
 8166                          rFlagsReg cr)
 8167 %{
 8168   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 8169   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 8170   effect(KILL cr, KILL oldval);
 8171 
 8172   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8173             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8174             "sete    $res\n\t"
 8175             "movzbl  $res, $res" %}
 8176   ins_encode %{
 8177     __ lock();
 8178     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8179     __ setb(Assembler::equal, $res$$Register);
 8180     __ movzbl($res$$Register, $res$$Register);
 8181   %}
 8182   ins_pipe( pipe_cmpxchg );
 8183 %}
 8184 
 8185 instruct compareAndSwapS(rRegI res,
 8186                          memory mem_ptr,
 8187                          rax_RegI oldval, rRegI newval,
 8188                          rFlagsReg cr)
 8189 %{
 8190   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 8191   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 8192   effect(KILL cr, KILL oldval);
 8193 
 8194   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8195             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8196             "sete    $res\n\t"
 8197             "movzbl  $res, $res" %}
 8198   ins_encode %{
 8199     __ lock();
 8200     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8201     __ setb(Assembler::equal, $res$$Register);
 8202     __ movzbl($res$$Register, $res$$Register);
 8203   %}
 8204   ins_pipe( pipe_cmpxchg );
 8205 %}
 8206 
 8207 instruct compareAndSwapN(rRegI res,
 8208                           memory mem_ptr,
 8209                           rax_RegN oldval, rRegN newval,
 8210                           rFlagsReg cr) %{
 8211   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 8212   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 8213   effect(KILL cr, KILL oldval);
 8214 
 8215   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8216             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8217             "sete    $res\n\t"
 8218             "movzbl  $res, $res" %}
 8219   ins_encode %{
 8220     __ lock();
 8221     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8222     __ setb(Assembler::equal, $res$$Register);
 8223     __ movzbl($res$$Register, $res$$Register);
 8224   %}
 8225   ins_pipe( pipe_cmpxchg );
 8226 %}
 8227 
 8228 instruct compareAndExchangeB(
 8229                          memory mem_ptr,
 8230                          rax_RegI oldval, rRegI newval,
 8231                          rFlagsReg cr)
 8232 %{
 8233   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 8234   effect(KILL cr);
 8235 
 8236   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8237             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8238   ins_encode %{
 8239     __ lock();
 8240     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8241   %}
 8242   ins_pipe( pipe_cmpxchg );
 8243 %}
 8244 
 8245 instruct compareAndExchangeS(
 8246                          memory mem_ptr,
 8247                          rax_RegI oldval, rRegI newval,
 8248                          rFlagsReg cr)
 8249 %{
 8250   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 8251   effect(KILL cr);
 8252 
 8253   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8254             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8255   ins_encode %{
 8256     __ lock();
 8257     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8258   %}
 8259   ins_pipe( pipe_cmpxchg );
 8260 %}
 8261 
 8262 instruct compareAndExchangeI(
 8263                          memory mem_ptr,
 8264                          rax_RegI oldval, rRegI newval,
 8265                          rFlagsReg cr)
 8266 %{
 8267   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 8268   effect(KILL cr);
 8269 
 8270   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8271             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8272   ins_encode %{
 8273     __ lock();
 8274     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8275   %}
 8276   ins_pipe( pipe_cmpxchg );
 8277 %}
 8278 
 8279 instruct compareAndExchangeL(
 8280                          memory mem_ptr,
 8281                          rax_RegL oldval, rRegL newval,
 8282                          rFlagsReg cr)
 8283 %{
 8284   predicate(VM_Version::supports_cx8());
 8285   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 8286   effect(KILL cr);
 8287 
 8288   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8289             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8290   ins_encode %{
 8291     __ lock();
 8292     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8293   %}
 8294   ins_pipe( pipe_cmpxchg );
 8295 %}
 8296 
 8297 instruct compareAndExchangeN(
 8298                           memory mem_ptr,
 8299                           rax_RegN oldval, rRegN newval,
 8300                           rFlagsReg cr) %{
 8301   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 8302   effect(KILL cr);
 8303 
 8304   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8305             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8306   ins_encode %{
 8307     __ lock();
 8308     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8309   %}
 8310   ins_pipe( pipe_cmpxchg );
 8311 %}
 8312 
 8313 instruct compareAndExchangeP(
 8314                          memory mem_ptr,
 8315                          rax_RegP oldval, rRegP newval,
 8316                          rFlagsReg cr)
 8317 %{
 8318   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8319   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 8320   effect(KILL cr);
 8321 
 8322   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8323             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8324   ins_encode %{
 8325     __ lock();
 8326     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8327   %}
 8328   ins_pipe( pipe_cmpxchg );
 8329 %}
 8330 
 8331 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8332   predicate(n->as_LoadStore()->result_not_used());
 8333   match(Set dummy (GetAndAddB mem add));
 8334   effect(KILL cr);
 8335   format %{ "ADDB  [$mem],$add" %}
 8336   ins_encode %{
 8337     __ lock();
 8338     __ addb($mem$$Address, $add$$constant);
 8339   %}
 8340   ins_pipe( pipe_cmpxchg );
 8341 %}
 8342 
 8343 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
 8344   match(Set newval (GetAndAddB mem newval));
 8345   effect(KILL cr);
 8346   format %{ "XADDB  [$mem],$newval" %}
 8347   ins_encode %{
 8348     __ lock();
 8349     __ xaddb($mem$$Address, $newval$$Register);
 8350   %}
 8351   ins_pipe( pipe_cmpxchg );
 8352 %}
 8353 
 8354 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8355   predicate(n->as_LoadStore()->result_not_used());
 8356   match(Set dummy (GetAndAddS mem add));
 8357   effect(KILL cr);
 8358   format %{ "ADDW  [$mem],$add" %}
 8359   ins_encode %{
 8360     __ lock();
 8361     __ addw($mem$$Address, $add$$constant);
 8362   %}
 8363   ins_pipe( pipe_cmpxchg );
 8364 %}
 8365 
 8366 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
 8367   match(Set newval (GetAndAddS mem newval));
 8368   effect(KILL cr);
 8369   format %{ "XADDW  [$mem],$newval" %}
 8370   ins_encode %{
 8371     __ lock();
 8372     __ xaddw($mem$$Address, $newval$$Register);
 8373   %}
 8374   ins_pipe( pipe_cmpxchg );
 8375 %}
 8376 
 8377 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8378   predicate(n->as_LoadStore()->result_not_used());
 8379   match(Set dummy (GetAndAddI mem add));
 8380   effect(KILL cr);
 8381   format %{ "ADDL  [$mem],$add" %}
 8382   ins_encode %{
 8383     __ lock();
 8384     __ addl($mem$$Address, $add$$constant);
 8385   %}
 8386   ins_pipe( pipe_cmpxchg );
 8387 %}
 8388 
 8389 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
 8390   match(Set newval (GetAndAddI mem newval));
 8391   effect(KILL cr);
 8392   format %{ "XADDL  [$mem],$newval" %}
 8393   ins_encode %{
 8394     __ lock();
 8395     __ xaddl($mem$$Address, $newval$$Register);
 8396   %}
 8397   ins_pipe( pipe_cmpxchg );
 8398 %}
 8399 
 8400 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 8401   predicate(n->as_LoadStore()->result_not_used());
 8402   match(Set dummy (GetAndAddL mem add));
 8403   effect(KILL cr);
 8404   format %{ "ADDQ  [$mem],$add" %}
 8405   ins_encode %{
 8406     __ lock();
 8407     __ addq($mem$$Address, $add$$constant);
 8408   %}
 8409   ins_pipe( pipe_cmpxchg );
 8410 %}
 8411 
 8412 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
 8413   match(Set newval (GetAndAddL mem newval));
 8414   effect(KILL cr);
 8415   format %{ "XADDQ  [$mem],$newval" %}
 8416   ins_encode %{
 8417     __ lock();
 8418     __ xaddq($mem$$Address, $newval$$Register);
 8419   %}
 8420   ins_pipe( pipe_cmpxchg );
 8421 %}
 8422 
 8423 instruct xchgB( memory mem, rRegI newval) %{
 8424   match(Set newval (GetAndSetB mem newval));
 8425   format %{ "XCHGB  $newval,[$mem]" %}
 8426   ins_encode %{
 8427     __ xchgb($newval$$Register, $mem$$Address);
 8428   %}
 8429   ins_pipe( pipe_cmpxchg );
 8430 %}
 8431 
 8432 instruct xchgS( memory mem, rRegI newval) %{
 8433   match(Set newval (GetAndSetS mem newval));
 8434   format %{ "XCHGW  $newval,[$mem]" %}
 8435   ins_encode %{
 8436     __ xchgw($newval$$Register, $mem$$Address);
 8437   %}
 8438   ins_pipe( pipe_cmpxchg );
 8439 %}
 8440 
 8441 instruct xchgI( memory mem, rRegI newval) %{
 8442   match(Set newval (GetAndSetI mem newval));
 8443   format %{ "XCHGL  $newval,[$mem]" %}
 8444   ins_encode %{
 8445     __ xchgl($newval$$Register, $mem$$Address);
 8446   %}
 8447   ins_pipe( pipe_cmpxchg );
 8448 %}
 8449 
 8450 instruct xchgL( memory mem, rRegL newval) %{
 8451   match(Set newval (GetAndSetL mem newval));
 8452   format %{ "XCHGL  $newval,[$mem]" %}
 8453   ins_encode %{
 8454     __ xchgq($newval$$Register, $mem$$Address);
 8455   %}
 8456   ins_pipe( pipe_cmpxchg );
 8457 %}
 8458 
 8459 instruct xchgP( memory mem, rRegP newval) %{
 8460   match(Set newval (GetAndSetP mem newval));
 8461   predicate(n->as_LoadStore()->barrier_data() == 0);
 8462   format %{ "XCHGQ  $newval,[$mem]" %}
 8463   ins_encode %{
 8464     __ xchgq($newval$$Register, $mem$$Address);
 8465   %}
 8466   ins_pipe( pipe_cmpxchg );
 8467 %}
 8468 
 8469 instruct xchgN( memory mem, rRegN newval) %{
 8470   match(Set newval (GetAndSetN mem newval));
 8471   format %{ "XCHGL  $newval,$mem]" %}
 8472   ins_encode %{
 8473     __ xchgl($newval$$Register, $mem$$Address);
 8474   %}
 8475   ins_pipe( pipe_cmpxchg );
 8476 %}
 8477 
 8478 //----------Abs Instructions-------------------------------------------
 8479 
 8480 // Integer Absolute Instructions
 8481 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8482 %{
 8483   match(Set dst (AbsI src));
 8484   effect(TEMP dst, KILL cr);
 8485   format %{ "xorl    $dst, $dst\t# abs int\n\t"
 8486             "subl    $dst, $src\n\t"
 8487             "cmovll  $dst, $src" %}
 8488   ins_encode %{
 8489     __ xorl($dst$$Register, $dst$$Register);
 8490     __ subl($dst$$Register, $src$$Register);
 8491     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
 8492   %}
 8493 
 8494   ins_pipe(ialu_reg_reg);
 8495 %}
 8496 
 8497 // Long Absolute Instructions
 8498 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8499 %{
 8500   match(Set dst (AbsL src));
 8501   effect(TEMP dst, KILL cr);
 8502   format %{ "xorl    $dst, $dst\t# abs long\n\t"
 8503             "subq    $dst, $src\n\t"
 8504             "cmovlq  $dst, $src" %}
 8505   ins_encode %{
 8506     __ xorl($dst$$Register, $dst$$Register);
 8507     __ subq($dst$$Register, $src$$Register);
 8508     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
 8509   %}
 8510 
 8511   ins_pipe(ialu_reg_reg);
 8512 %}
 8513 
 8514 //----------Subtraction Instructions-------------------------------------------
 8515 
 8516 // Integer Subtraction Instructions
 8517 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8518 %{
 8519   match(Set dst (SubI dst src));
 8520   effect(KILL cr);
 8521 
 8522   format %{ "subl    $dst, $src\t# int" %}
 8523   ins_encode %{
 8524     __ subl($dst$$Register, $src$$Register);
 8525   %}
 8526   ins_pipe(ialu_reg_reg);
 8527 %}
 8528 
 8529 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 8530 %{
 8531   match(Set dst (SubI dst (LoadI src)));
 8532   effect(KILL cr);
 8533 
 8534   ins_cost(150);
 8535   format %{ "subl    $dst, $src\t# int" %}
 8536   ins_encode %{
 8537     __ subl($dst$$Register, $src$$Address);
 8538   %}
 8539   ins_pipe(ialu_reg_mem);
 8540 %}
 8541 
 8542 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 8543 %{
 8544   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 8545   effect(KILL cr);
 8546 
 8547   ins_cost(150);
 8548   format %{ "subl    $dst, $src\t# int" %}
 8549   ins_encode %{
 8550     __ subl($dst$$Address, $src$$Register);
 8551   %}
 8552   ins_pipe(ialu_mem_reg);
 8553 %}
 8554 
 8555 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8556 %{
 8557   match(Set dst (SubL dst src));
 8558   effect(KILL cr);
 8559 
 8560   format %{ "subq    $dst, $src\t# long" %}
 8561   ins_encode %{
 8562     __ subq($dst$$Register, $src$$Register);
 8563   %}
 8564   ins_pipe(ialu_reg_reg);
 8565 %}
 8566 
 8567 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 8568 %{
 8569   match(Set dst (SubL dst (LoadL src)));
 8570   effect(KILL cr);
 8571 
 8572   ins_cost(150);
 8573   format %{ "subq    $dst, $src\t# long" %}
 8574   ins_encode %{
 8575     __ subq($dst$$Register, $src$$Address);
 8576   %}
 8577   ins_pipe(ialu_reg_mem);
 8578 %}
 8579 
 8580 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 8581 %{
 8582   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 8583   effect(KILL cr);
 8584 
 8585   ins_cost(150);
 8586   format %{ "subq    $dst, $src\t# long" %}
 8587   ins_encode %{
 8588     __ subq($dst$$Address, $src$$Register);
 8589   %}
 8590   ins_pipe(ialu_mem_reg);
 8591 %}
 8592 
 8593 // Subtract from a pointer
 8594 // XXX hmpf???
 8595 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 8596 %{
 8597   match(Set dst (AddP dst (SubI zero src)));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "subq    $dst, $src\t# ptr - int" %}
 8601   opcode(0x2B);
 8602   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
 8603   ins_pipe(ialu_reg_reg);
 8604 %}
 8605 
 8606 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 8607 %{
 8608   match(Set dst (SubI zero dst));
 8609   effect(KILL cr);
 8610 
 8611   format %{ "negl    $dst\t# int" %}
 8612   ins_encode %{
 8613     __ negl($dst$$Register);
 8614   %}
 8615   ins_pipe(ialu_reg);
 8616 %}
 8617 
 8618 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 8619 %{
 8620   match(Set dst (NegI dst));
 8621   effect(KILL cr);
 8622 
 8623   format %{ "negl    $dst\t# int" %}
 8624   ins_encode %{
 8625     __ negl($dst$$Register);
 8626   %}
 8627   ins_pipe(ialu_reg);
 8628 %}
 8629 
 8630 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 8631 %{
 8632   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 8633   effect(KILL cr);
 8634 
 8635   format %{ "negl    $dst\t# int" %}
 8636   ins_encode %{
 8637     __ negl($dst$$Address);
 8638   %}
 8639   ins_pipe(ialu_reg);
 8640 %}
 8641 
 8642 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 8643 %{
 8644   match(Set dst (SubL zero dst));
 8645   effect(KILL cr);
 8646 
 8647   format %{ "negq    $dst\t# long" %}
 8648   ins_encode %{
 8649     __ negq($dst$$Register);
 8650   %}
 8651   ins_pipe(ialu_reg);
 8652 %}
 8653 
 8654 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 8655 %{
 8656   match(Set dst (NegL dst));
 8657   effect(KILL cr);
 8658 
 8659   format %{ "negq    $dst\t# int" %}
 8660   ins_encode %{
 8661     __ negq($dst$$Register);
 8662   %}
 8663   ins_pipe(ialu_reg);
 8664 %}
 8665 
 8666 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 8667 %{
 8668   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 8669   effect(KILL cr);
 8670 
 8671   format %{ "negq    $dst\t# long" %}
 8672   ins_encode %{
 8673     __ negq($dst$$Address);
 8674   %}
 8675   ins_pipe(ialu_reg);
 8676 %}
 8677 
 8678 //----------Multiplication/Division Instructions-------------------------------
 8679 // Integer Multiplication Instructions
 8680 // Multiply Register
 8681 
 8682 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8683 %{
 8684   match(Set dst (MulI dst src));
 8685   effect(KILL cr);
 8686 
 8687   ins_cost(300);
 8688   format %{ "imull   $dst, $src\t# int" %}
 8689   ins_encode %{
 8690     __ imull($dst$$Register, $src$$Register);
 8691   %}
 8692   ins_pipe(ialu_reg_reg_alu0);
 8693 %}
 8694 
 8695 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 8696 %{
 8697   match(Set dst (MulI src imm));
 8698   effect(KILL cr);
 8699 
 8700   ins_cost(300);
 8701   format %{ "imull   $dst, $src, $imm\t# int" %}
 8702   ins_encode %{
 8703     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 8704   %}
 8705   ins_pipe(ialu_reg_reg_alu0);
 8706 %}
 8707 
 8708 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 8709 %{
 8710   match(Set dst (MulI dst (LoadI src)));
 8711   effect(KILL cr);
 8712 
 8713   ins_cost(350);
 8714   format %{ "imull   $dst, $src\t# int" %}
 8715   ins_encode %{
 8716     __ imull($dst$$Register, $src$$Address);
 8717   %}
 8718   ins_pipe(ialu_reg_mem_alu0);
 8719 %}
 8720 
 8721 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 8722 %{
 8723   match(Set dst (MulI (LoadI src) imm));
 8724   effect(KILL cr);
 8725 
 8726   ins_cost(300);
 8727   format %{ "imull   $dst, $src, $imm\t# int" %}
 8728   ins_encode %{
 8729     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 8730   %}
 8731   ins_pipe(ialu_reg_mem_alu0);
 8732 %}
 8733 
 8734 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 8735 %{
 8736   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 8737   effect(KILL cr, KILL src2);
 8738 
 8739   expand %{ mulI_rReg(dst, src1, cr);
 8740            mulI_rReg(src2, src3, cr);
 8741            addI_rReg(dst, src2, cr); %}
 8742 %}
 8743 
 8744 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8745 %{
 8746   match(Set dst (MulL dst src));
 8747   effect(KILL cr);
 8748 
 8749   ins_cost(300);
 8750   format %{ "imulq   $dst, $src\t# long" %}
 8751   ins_encode %{
 8752     __ imulq($dst$$Register, $src$$Register);
 8753   %}
 8754   ins_pipe(ialu_reg_reg_alu0);
 8755 %}
 8756 
 8757 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 8758 %{
 8759   match(Set dst (MulL src imm));
 8760   effect(KILL cr);
 8761 
 8762   ins_cost(300);
 8763   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8764   ins_encode %{
 8765     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 8766   %}
 8767   ins_pipe(ialu_reg_reg_alu0);
 8768 %}
 8769 
 8770 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 8771 %{
 8772   match(Set dst (MulL dst (LoadL src)));
 8773   effect(KILL cr);
 8774 
 8775   ins_cost(350);
 8776   format %{ "imulq   $dst, $src\t# long" %}
 8777   ins_encode %{
 8778     __ imulq($dst$$Register, $src$$Address);
 8779   %}
 8780   ins_pipe(ialu_reg_mem_alu0);
 8781 %}
 8782 
 8783 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 8784 %{
 8785   match(Set dst (MulL (LoadL src) imm));
 8786   effect(KILL cr);
 8787 
 8788   ins_cost(300);
 8789   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8790   ins_encode %{
 8791     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 8792   %}
 8793   ins_pipe(ialu_reg_mem_alu0);
 8794 %}
 8795 
 8796 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8797 %{
 8798   match(Set dst (MulHiL src rax));
 8799   effect(USE_KILL rax, KILL cr);
 8800 
 8801   ins_cost(300);
 8802   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 8803   ins_encode %{
 8804     __ imulq($src$$Register);
 8805   %}
 8806   ins_pipe(ialu_reg_reg_alu0);
 8807 %}
 8808 
 8809 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8810 %{
 8811   match(Set dst (UMulHiL src rax));
 8812   effect(USE_KILL rax, KILL cr);
 8813 
 8814   ins_cost(300);
 8815   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8816   ins_encode %{
 8817     __ mulq($src$$Register);
 8818   %}
 8819   ins_pipe(ialu_reg_reg_alu0);
 8820 %}
 8821 
 8822 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8823                    rFlagsReg cr)
 8824 %{
 8825   match(Set rax (DivI rax div));
 8826   effect(KILL rdx, KILL cr);
 8827 
 8828   ins_cost(30*100+10*100); // XXX
 8829   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8830             "jne,s   normal\n\t"
 8831             "xorl    rdx, rdx\n\t"
 8832             "cmpl    $div, -1\n\t"
 8833             "je,s    done\n"
 8834     "normal: cdql\n\t"
 8835             "idivl   $div\n"
 8836     "done:"        %}
 8837   ins_encode(cdql_enc(div));
 8838   ins_pipe(ialu_reg_reg_alu0);
 8839 %}
 8840 
 8841 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8842                    rFlagsReg cr)
 8843 %{
 8844   match(Set rax (DivL rax div));
 8845   effect(KILL rdx, KILL cr);
 8846 
 8847   ins_cost(30*100+10*100); // XXX
 8848   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8849             "cmpq    rax, rdx\n\t"
 8850             "jne,s   normal\n\t"
 8851             "xorl    rdx, rdx\n\t"
 8852             "cmpq    $div, -1\n\t"
 8853             "je,s    done\n"
 8854     "normal: cdqq\n\t"
 8855             "idivq   $div\n"
 8856     "done:"        %}
 8857   ins_encode(cdqq_enc(div));
 8858   ins_pipe(ialu_reg_reg_alu0);
 8859 %}
 8860 
 8861 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8862 %{
 8863   match(Set rax (UDivI rax div));
 8864   effect(KILL rdx, KILL cr);
 8865 
 8866   ins_cost(300);
 8867   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8868   ins_encode %{
 8869     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8870   %}
 8871   ins_pipe(ialu_reg_reg_alu0);
 8872 %}
 8873 
 8874 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8875 %{
 8876   match(Set rax (UDivL rax div));
 8877   effect(KILL rdx, KILL cr);
 8878 
 8879   ins_cost(300);
 8880   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8881   ins_encode %{
 8882      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8883   %}
 8884   ins_pipe(ialu_reg_reg_alu0);
 8885 %}
 8886 
 8887 // Integer DIVMOD with Register, both quotient and mod results
 8888 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8889                              rFlagsReg cr)
 8890 %{
 8891   match(DivModI rax div);
 8892   effect(KILL cr);
 8893 
 8894   ins_cost(30*100+10*100); // XXX
 8895   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8896             "jne,s   normal\n\t"
 8897             "xorl    rdx, rdx\n\t"
 8898             "cmpl    $div, -1\n\t"
 8899             "je,s    done\n"
 8900     "normal: cdql\n\t"
 8901             "idivl   $div\n"
 8902     "done:"        %}
 8903   ins_encode(cdql_enc(div));
 8904   ins_pipe(pipe_slow);
 8905 %}
 8906 
 8907 // Long DIVMOD with Register, both quotient and mod results
 8908 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8909                              rFlagsReg cr)
 8910 %{
 8911   match(DivModL rax div);
 8912   effect(KILL cr);
 8913 
 8914   ins_cost(30*100+10*100); // XXX
 8915   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8916             "cmpq    rax, rdx\n\t"
 8917             "jne,s   normal\n\t"
 8918             "xorl    rdx, rdx\n\t"
 8919             "cmpq    $div, -1\n\t"
 8920             "je,s    done\n"
 8921     "normal: cdqq\n\t"
 8922             "idivq   $div\n"
 8923     "done:"        %}
 8924   ins_encode(cdqq_enc(div));
 8925   ins_pipe(pipe_slow);
 8926 %}
 8927 
 8928 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8929 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 8930                               no_rax_rdx_RegI div, rFlagsReg cr)
 8931 %{
 8932   match(UDivModI rax div);
 8933   effect(TEMP tmp, KILL cr);
 8934 
 8935   ins_cost(300);
 8936   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 8937             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 8938           %}
 8939   ins_encode %{
 8940     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8941   %}
 8942   ins_pipe(pipe_slow);
 8943 %}
 8944 
 8945 // Unsigned long DIVMOD with Register, both quotient and mod results
 8946 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 8947                               no_rax_rdx_RegL div, rFlagsReg cr)
 8948 %{
 8949   match(UDivModL rax div);
 8950   effect(TEMP tmp, KILL cr);
 8951 
 8952   ins_cost(300);
 8953   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 8954             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 8955           %}
 8956   ins_encode %{
 8957     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8958   %}
 8959   ins_pipe(pipe_slow);
 8960 %}
 8961 
 8962 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 8963                    rFlagsReg cr)
 8964 %{
 8965   match(Set rdx (ModI rax div));
 8966   effect(KILL rax, KILL cr);
 8967 
 8968   ins_cost(300); // XXX
 8969   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 8970             "jne,s   normal\n\t"
 8971             "xorl    rdx, rdx\n\t"
 8972             "cmpl    $div, -1\n\t"
 8973             "je,s    done\n"
 8974     "normal: cdql\n\t"
 8975             "idivl   $div\n"
 8976     "done:"        %}
 8977   ins_encode(cdql_enc(div));
 8978   ins_pipe(ialu_reg_reg_alu0);
 8979 %}
 8980 
 8981 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 8982                    rFlagsReg cr)
 8983 %{
 8984   match(Set rdx (ModL rax div));
 8985   effect(KILL rax, KILL cr);
 8986 
 8987   ins_cost(300); // XXX
 8988   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 8989             "cmpq    rax, rdx\n\t"
 8990             "jne,s   normal\n\t"
 8991             "xorl    rdx, rdx\n\t"
 8992             "cmpq    $div, -1\n\t"
 8993             "je,s    done\n"
 8994     "normal: cdqq\n\t"
 8995             "idivq   $div\n"
 8996     "done:"        %}
 8997   ins_encode(cdqq_enc(div));
 8998   ins_pipe(ialu_reg_reg_alu0);
 8999 %}
 9000 
 9001 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 9002 %{
 9003   match(Set rdx (UModI rax div));
 9004   effect(KILL rax, KILL cr);
 9005 
 9006   ins_cost(300);
 9007   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 9008   ins_encode %{
 9009     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 9010   %}
 9011   ins_pipe(ialu_reg_reg_alu0);
 9012 %}
 9013 
 9014 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 9015 %{
 9016   match(Set rdx (UModL rax div));
 9017   effect(KILL rax, KILL cr);
 9018 
 9019   ins_cost(300);
 9020   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 9021   ins_encode %{
 9022     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 9023   %}
 9024   ins_pipe(ialu_reg_reg_alu0);
 9025 %}
 9026 
 9027 // Integer Shift Instructions
 9028 // Shift Left by one, two, three
 9029 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
 9030 %{
 9031   match(Set dst (LShiftI dst shift));
 9032   effect(KILL cr);
 9033 
 9034   format %{ "sall    $dst, $shift" %}
 9035   ins_encode %{
 9036     __ sall($dst$$Register, $shift$$constant);
 9037   %}
 9038   ins_pipe(ialu_reg);
 9039 %}
 9040 
 9041 // Shift Left by 8-bit immediate
 9042 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9043 %{
 9044   match(Set dst (LShiftI dst shift));
 9045   effect(KILL cr);
 9046 
 9047   format %{ "sall    $dst, $shift" %}
 9048   ins_encode %{
 9049     __ sall($dst$$Register, $shift$$constant);
 9050   %}
 9051   ins_pipe(ialu_reg);
 9052 %}
 9053 
 9054 // Shift Left by 8-bit immediate
 9055 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9056 %{
 9057   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9058   effect(KILL cr);
 9059 
 9060   format %{ "sall    $dst, $shift" %}
 9061   ins_encode %{
 9062     __ sall($dst$$Address, $shift$$constant);
 9063   %}
 9064   ins_pipe(ialu_mem_imm);
 9065 %}
 9066 
 9067 // Shift Left by variable
 9068 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9069 %{
 9070   predicate(!VM_Version::supports_bmi2());
 9071   match(Set dst (LShiftI dst shift));
 9072   effect(KILL cr);
 9073 
 9074   format %{ "sall    $dst, $shift" %}
 9075   ins_encode %{
 9076     __ sall($dst$$Register);
 9077   %}
 9078   ins_pipe(ialu_reg_reg);
 9079 %}
 9080 
 9081 // Shift Left by variable
 9082 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9083 %{
 9084   predicate(!VM_Version::supports_bmi2());
 9085   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9086   effect(KILL cr);
 9087 
 9088   format %{ "sall    $dst, $shift" %}
 9089   ins_encode %{
 9090     __ sall($dst$$Address);
 9091   %}
 9092   ins_pipe(ialu_mem_reg);
 9093 %}
 9094 
 9095 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9096 %{
 9097   predicate(VM_Version::supports_bmi2());
 9098   match(Set dst (LShiftI src shift));
 9099 
 9100   format %{ "shlxl   $dst, $src, $shift" %}
 9101   ins_encode %{
 9102     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 9103   %}
 9104   ins_pipe(ialu_reg_reg);
 9105 %}
 9106 
 9107 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9108 %{
 9109   predicate(VM_Version::supports_bmi2());
 9110   match(Set dst (LShiftI (LoadI src) shift));
 9111   ins_cost(175);
 9112   format %{ "shlxl   $dst, $src, $shift" %}
 9113   ins_encode %{
 9114     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 9115   %}
 9116   ins_pipe(ialu_reg_mem);
 9117 %}
 9118 
 9119 // Arithmetic Shift Right by 8-bit immediate
 9120 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9121 %{
 9122   match(Set dst (RShiftI dst shift));
 9123   effect(KILL cr);
 9124 
 9125   format %{ "sarl    $dst, $shift" %}
 9126   ins_encode %{
 9127     __ sarl($dst$$Register, $shift$$constant);
 9128   %}
 9129   ins_pipe(ialu_mem_imm);
 9130 %}
 9131 
 9132 // Arithmetic Shift Right by 8-bit immediate
 9133 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9134 %{
 9135   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9136   effect(KILL cr);
 9137 
 9138   format %{ "sarl    $dst, $shift" %}
 9139   ins_encode %{
 9140     __ sarl($dst$$Address, $shift$$constant);
 9141   %}
 9142   ins_pipe(ialu_mem_imm);
 9143 %}
 9144 
 9145 // Arithmetic Shift Right by variable
 9146 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9147 %{
 9148   predicate(!VM_Version::supports_bmi2());
 9149   match(Set dst (RShiftI dst shift));
 9150   effect(KILL cr);
 9151   format %{ "sarl    $dst, $shift" %}
 9152   ins_encode %{
 9153     __ sarl($dst$$Register);
 9154   %}
 9155   ins_pipe(ialu_reg_reg);
 9156 %}
 9157 
 9158 // Arithmetic Shift Right by variable
 9159 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9160 %{
 9161   predicate(!VM_Version::supports_bmi2());
 9162   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9163   effect(KILL cr);
 9164 
 9165   format %{ "sarl    $dst, $shift" %}
 9166   ins_encode %{
 9167     __ sarl($dst$$Address);
 9168   %}
 9169   ins_pipe(ialu_mem_reg);
 9170 %}
 9171 
 9172 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9173 %{
 9174   predicate(VM_Version::supports_bmi2());
 9175   match(Set dst (RShiftI src shift));
 9176 
 9177   format %{ "sarxl   $dst, $src, $shift" %}
 9178   ins_encode %{
 9179     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 9180   %}
 9181   ins_pipe(ialu_reg_reg);
 9182 %}
 9183 
 9184 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9185 %{
 9186   predicate(VM_Version::supports_bmi2());
 9187   match(Set dst (RShiftI (LoadI src) shift));
 9188   ins_cost(175);
 9189   format %{ "sarxl   $dst, $src, $shift" %}
 9190   ins_encode %{
 9191     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 9192   %}
 9193   ins_pipe(ialu_reg_mem);
 9194 %}
 9195 
 9196 // Logical Shift Right by 8-bit immediate
 9197 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9198 %{
 9199   match(Set dst (URShiftI dst shift));
 9200   effect(KILL cr);
 9201 
 9202   format %{ "shrl    $dst, $shift" %}
 9203   ins_encode %{
 9204     __ shrl($dst$$Register, $shift$$constant);
 9205   %}
 9206   ins_pipe(ialu_reg);
 9207 %}
 9208 
 9209 // Logical Shift Right by 8-bit immediate
 9210 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9211 %{
 9212   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9213   effect(KILL cr);
 9214 
 9215   format %{ "shrl    $dst, $shift" %}
 9216   ins_encode %{
 9217     __ shrl($dst$$Address, $shift$$constant);
 9218   %}
 9219   ins_pipe(ialu_mem_imm);
 9220 %}
 9221 
 9222 // Logical Shift Right by variable
 9223 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9224 %{
 9225   predicate(!VM_Version::supports_bmi2());
 9226   match(Set dst (URShiftI dst shift));
 9227   effect(KILL cr);
 9228 
 9229   format %{ "shrl    $dst, $shift" %}
 9230   ins_encode %{
 9231     __ shrl($dst$$Register);
 9232   %}
 9233   ins_pipe(ialu_reg_reg);
 9234 %}
 9235 
 9236 // Logical Shift Right by variable
 9237 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9238 %{
 9239   predicate(!VM_Version::supports_bmi2());
 9240   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9241   effect(KILL cr);
 9242 
 9243   format %{ "shrl    $dst, $shift" %}
 9244   ins_encode %{
 9245     __ shrl($dst$$Address);
 9246   %}
 9247   ins_pipe(ialu_mem_reg);
 9248 %}
 9249 
 9250 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9251 %{
 9252   predicate(VM_Version::supports_bmi2());
 9253   match(Set dst (URShiftI src shift));
 9254 
 9255   format %{ "shrxl   $dst, $src, $shift" %}
 9256   ins_encode %{
 9257     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 9258   %}
 9259   ins_pipe(ialu_reg_reg);
 9260 %}
 9261 
 9262 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9263 %{
 9264   predicate(VM_Version::supports_bmi2());
 9265   match(Set dst (URShiftI (LoadI src) shift));
 9266   ins_cost(175);
 9267   format %{ "shrxl   $dst, $src, $shift" %}
 9268   ins_encode %{
 9269     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 9270   %}
 9271   ins_pipe(ialu_reg_mem);
 9272 %}
 9273 
 9274 // Long Shift Instructions
 9275 // Shift Left by one, two, three
 9276 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
 9277 %{
 9278   match(Set dst (LShiftL dst shift));
 9279   effect(KILL cr);
 9280 
 9281   format %{ "salq    $dst, $shift" %}
 9282   ins_encode %{
 9283     __ salq($dst$$Register, $shift$$constant);
 9284   %}
 9285   ins_pipe(ialu_reg);
 9286 %}
 9287 
 9288 // Shift Left by 8-bit immediate
 9289 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9290 %{
 9291   match(Set dst (LShiftL dst shift));
 9292   effect(KILL cr);
 9293 
 9294   format %{ "salq    $dst, $shift" %}
 9295   ins_encode %{
 9296     __ salq($dst$$Register, $shift$$constant);
 9297   %}
 9298   ins_pipe(ialu_reg);
 9299 %}
 9300 
 9301 // Shift Left by 8-bit immediate
 9302 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9303 %{
 9304   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9305   effect(KILL cr);
 9306 
 9307   format %{ "salq    $dst, $shift" %}
 9308   ins_encode %{
 9309     __ salq($dst$$Address, $shift$$constant);
 9310   %}
 9311   ins_pipe(ialu_mem_imm);
 9312 %}
 9313 
 9314 // Shift Left by variable
 9315 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9316 %{
 9317   predicate(!VM_Version::supports_bmi2());
 9318   match(Set dst (LShiftL dst shift));
 9319   effect(KILL cr);
 9320 
 9321   format %{ "salq    $dst, $shift" %}
 9322   ins_encode %{
 9323     __ salq($dst$$Register);
 9324   %}
 9325   ins_pipe(ialu_reg_reg);
 9326 %}
 9327 
 9328 // Shift Left by variable
 9329 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9330 %{
 9331   predicate(!VM_Version::supports_bmi2());
 9332   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9333   effect(KILL cr);
 9334 
 9335   format %{ "salq    $dst, $shift" %}
 9336   ins_encode %{
 9337     __ salq($dst$$Address);
 9338   %}
 9339   ins_pipe(ialu_mem_reg);
 9340 %}
 9341 
 9342 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9343 %{
 9344   predicate(VM_Version::supports_bmi2());
 9345   match(Set dst (LShiftL src shift));
 9346 
 9347   format %{ "shlxq   $dst, $src, $shift" %}
 9348   ins_encode %{
 9349     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 9350   %}
 9351   ins_pipe(ialu_reg_reg);
 9352 %}
 9353 
 9354 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9355 %{
 9356   predicate(VM_Version::supports_bmi2());
 9357   match(Set dst (LShiftL (LoadL src) shift));
 9358   ins_cost(175);
 9359   format %{ "shlxq   $dst, $src, $shift" %}
 9360   ins_encode %{
 9361     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 9362   %}
 9363   ins_pipe(ialu_reg_mem);
 9364 %}
 9365 
 9366 // Arithmetic Shift Right by 8-bit immediate
 9367 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 9368 %{
 9369   match(Set dst (RShiftL dst shift));
 9370   effect(KILL cr);
 9371 
 9372   format %{ "sarq    $dst, $shift" %}
 9373   ins_encode %{
 9374     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 9375   %}
 9376   ins_pipe(ialu_mem_imm);
 9377 %}
 9378 
 9379 // Arithmetic Shift Right by 8-bit immediate
 9380 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 9381 %{
 9382   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9383   effect(KILL cr);
 9384 
 9385   format %{ "sarq    $dst, $shift" %}
 9386   ins_encode %{
 9387     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 9388   %}
 9389   ins_pipe(ialu_mem_imm);
 9390 %}
 9391 
 9392 // Arithmetic Shift Right by variable
 9393 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9394 %{
 9395   predicate(!VM_Version::supports_bmi2());
 9396   match(Set dst (RShiftL dst shift));
 9397   effect(KILL cr);
 9398 
 9399   format %{ "sarq    $dst, $shift" %}
 9400   ins_encode %{
 9401     __ sarq($dst$$Register);
 9402   %}
 9403   ins_pipe(ialu_reg_reg);
 9404 %}
 9405 
 9406 // Arithmetic Shift Right by variable
 9407 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9408 %{
 9409   predicate(!VM_Version::supports_bmi2());
 9410   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9411   effect(KILL cr);
 9412 
 9413   format %{ "sarq    $dst, $shift" %}
 9414   ins_encode %{
 9415     __ sarq($dst$$Address);
 9416   %}
 9417   ins_pipe(ialu_mem_reg);
 9418 %}
 9419 
 9420 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9421 %{
 9422   predicate(VM_Version::supports_bmi2());
 9423   match(Set dst (RShiftL src shift));
 9424 
 9425   format %{ "sarxq   $dst, $src, $shift" %}
 9426   ins_encode %{
 9427     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 9428   %}
 9429   ins_pipe(ialu_reg_reg);
 9430 %}
 9431 
 9432 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9433 %{
 9434   predicate(VM_Version::supports_bmi2());
 9435   match(Set dst (RShiftL (LoadL src) shift));
 9436   ins_cost(175);
 9437   format %{ "sarxq   $dst, $src, $shift" %}
 9438   ins_encode %{
 9439     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 9440   %}
 9441   ins_pipe(ialu_reg_mem);
 9442 %}
 9443 
 9444 // Logical Shift Right by 8-bit immediate
 9445 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9446 %{
 9447   match(Set dst (URShiftL dst shift));
 9448   effect(KILL cr);
 9449 
 9450   format %{ "shrq    $dst, $shift" %}
 9451   ins_encode %{
 9452     __ shrq($dst$$Register, $shift$$constant);
 9453   %}
 9454   ins_pipe(ialu_reg);
 9455 %}
 9456 
 9457 // Logical Shift Right by 8-bit immediate
 9458 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9459 %{
 9460   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9461   effect(KILL cr);
 9462 
 9463   format %{ "shrq    $dst, $shift" %}
 9464   ins_encode %{
 9465     __ shrq($dst$$Address, $shift$$constant);
 9466   %}
 9467   ins_pipe(ialu_mem_imm);
 9468 %}
 9469 
 9470 // Logical Shift Right by variable
 9471 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9472 %{
 9473   predicate(!VM_Version::supports_bmi2());
 9474   match(Set dst (URShiftL dst shift));
 9475   effect(KILL cr);
 9476 
 9477   format %{ "shrq    $dst, $shift" %}
 9478   ins_encode %{
 9479     __ shrq($dst$$Register);
 9480   %}
 9481   ins_pipe(ialu_reg_reg);
 9482 %}
 9483 
 9484 // Logical Shift Right by variable
 9485 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9486 %{
 9487   predicate(!VM_Version::supports_bmi2());
 9488   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9489   effect(KILL cr);
 9490 
 9491   format %{ "shrq    $dst, $shift" %}
 9492   ins_encode %{
 9493     __ shrq($dst$$Address);
 9494   %}
 9495   ins_pipe(ialu_mem_reg);
 9496 %}
 9497 
 9498 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9499 %{
 9500   predicate(VM_Version::supports_bmi2());
 9501   match(Set dst (URShiftL src shift));
 9502 
 9503   format %{ "shrxq   $dst, $src, $shift" %}
 9504   ins_encode %{
 9505     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 9506   %}
 9507   ins_pipe(ialu_reg_reg);
 9508 %}
 9509 
 9510 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9511 %{
 9512   predicate(VM_Version::supports_bmi2());
 9513   match(Set dst (URShiftL (LoadL src) shift));
 9514   ins_cost(175);
 9515   format %{ "shrxq   $dst, $src, $shift" %}
 9516   ins_encode %{
 9517     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 9518   %}
 9519   ins_pipe(ialu_reg_mem);
 9520 %}
 9521 
 9522 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 9523 // This idiom is used by the compiler for the i2b bytecode.
 9524 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 9525 %{
 9526   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 9527 
 9528   format %{ "movsbl  $dst, $src\t# i2b" %}
 9529   ins_encode %{
 9530     __ movsbl($dst$$Register, $src$$Register);
 9531   %}
 9532   ins_pipe(ialu_reg_reg);
 9533 %}
 9534 
 9535 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 9536 // This idiom is used by the compiler the i2s bytecode.
 9537 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 9538 %{
 9539   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 9540 
 9541   format %{ "movswl  $dst, $src\t# i2s" %}
 9542   ins_encode %{
 9543     __ movswl($dst$$Register, $src$$Register);
 9544   %}
 9545   ins_pipe(ialu_reg_reg);
 9546 %}
 9547 
 9548 // ROL/ROR instructions
 9549 
 9550 // Rotate left by constant.
 9551 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9552 %{
 9553   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9554   match(Set dst (RotateLeft dst shift));
 9555   effect(KILL cr);
 9556   format %{ "roll    $dst, $shift" %}
 9557   ins_encode %{
 9558     __ roll($dst$$Register, $shift$$constant);
 9559   %}
 9560   ins_pipe(ialu_reg);
 9561 %}
 9562 
 9563 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 9564 %{
 9565   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9566   match(Set dst (RotateLeft src shift));
 9567   format %{ "rolxl   $dst, $src, $shift" %}
 9568   ins_encode %{
 9569     int shift = 32 - ($shift$$constant & 31);
 9570     __ rorxl($dst$$Register, $src$$Register, shift);
 9571   %}
 9572   ins_pipe(ialu_reg_reg);
 9573 %}
 9574 
 9575 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9576 %{
 9577   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9578   match(Set dst (RotateLeft (LoadI src) shift));
 9579   ins_cost(175);
 9580   format %{ "rolxl   $dst, $src, $shift" %}
 9581   ins_encode %{
 9582     int shift = 32 - ($shift$$constant & 31);
 9583     __ rorxl($dst$$Register, $src$$Address, shift);
 9584   %}
 9585   ins_pipe(ialu_reg_mem);
 9586 %}
 9587 
 9588 // Rotate Left by variable
 9589 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9590 %{
 9591   predicate(n->bottom_type()->basic_type() == T_INT);
 9592   match(Set dst (RotateLeft dst shift));
 9593   effect(KILL cr);
 9594   format %{ "roll    $dst, $shift" %}
 9595   ins_encode %{
 9596     __ roll($dst$$Register);
 9597   %}
 9598   ins_pipe(ialu_reg_reg);
 9599 %}
 9600 
 9601 // Rotate Right by constant.
 9602 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9603 %{
 9604   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9605   match(Set dst (RotateRight dst shift));
 9606   effect(KILL cr);
 9607   format %{ "rorl    $dst, $shift" %}
 9608   ins_encode %{
 9609     __ rorl($dst$$Register, $shift$$constant);
 9610   %}
 9611   ins_pipe(ialu_reg);
 9612 %}
 9613 
 9614 // Rotate Right by constant.
 9615 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 9616 %{
 9617   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9618   match(Set dst (RotateRight src shift));
 9619   format %{ "rorxl   $dst, $src, $shift" %}
 9620   ins_encode %{
 9621     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 9622   %}
 9623   ins_pipe(ialu_reg_reg);
 9624 %}
 9625 
 9626 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9627 %{
 9628   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9629   match(Set dst (RotateRight (LoadI src) shift));
 9630   ins_cost(175);
 9631   format %{ "rorxl   $dst, $src, $shift" %}
 9632   ins_encode %{
 9633     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 9634   %}
 9635   ins_pipe(ialu_reg_mem);
 9636 %}
 9637 
 9638 // Rotate Right by variable
 9639 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9640 %{
 9641   predicate(n->bottom_type()->basic_type() == T_INT);
 9642   match(Set dst (RotateRight dst shift));
 9643   effect(KILL cr);
 9644   format %{ "rorl    $dst, $shift" %}
 9645   ins_encode %{
 9646     __ rorl($dst$$Register);
 9647   %}
 9648   ins_pipe(ialu_reg_reg);
 9649 %}
 9650 
 9651 // Rotate Left by constant.
 9652 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9653 %{
 9654   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9655   match(Set dst (RotateLeft dst shift));
 9656   effect(KILL cr);
 9657   format %{ "rolq    $dst, $shift" %}
 9658   ins_encode %{
 9659     __ rolq($dst$$Register, $shift$$constant);
 9660   %}
 9661   ins_pipe(ialu_reg);
 9662 %}
 9663 
 9664 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 9665 %{
 9666   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9667   match(Set dst (RotateLeft src shift));
 9668   format %{ "rolxq   $dst, $src, $shift" %}
 9669   ins_encode %{
 9670     int shift = 64 - ($shift$$constant & 63);
 9671     __ rorxq($dst$$Register, $src$$Register, shift);
 9672   %}
 9673   ins_pipe(ialu_reg_reg);
 9674 %}
 9675 
 9676 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9677 %{
 9678   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9679   match(Set dst (RotateLeft (LoadL src) shift));
 9680   ins_cost(175);
 9681   format %{ "rolxq   $dst, $src, $shift" %}
 9682   ins_encode %{
 9683     int shift = 64 - ($shift$$constant & 63);
 9684     __ rorxq($dst$$Register, $src$$Address, shift);
 9685   %}
 9686   ins_pipe(ialu_reg_mem);
 9687 %}
 9688 
 9689 // Rotate Left by variable
 9690 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9691 %{
 9692   predicate(n->bottom_type()->basic_type() == T_LONG);
 9693   match(Set dst (RotateLeft dst shift));
 9694   effect(KILL cr);
 9695   format %{ "rolq    $dst, $shift" %}
 9696   ins_encode %{
 9697     __ rolq($dst$$Register);
 9698   %}
 9699   ins_pipe(ialu_reg_reg);
 9700 %}
 9701 
 9702 // Rotate Right by constant.
 9703 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9704 %{
 9705   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9706   match(Set dst (RotateRight dst shift));
 9707   effect(KILL cr);
 9708   format %{ "rorq    $dst, $shift" %}
 9709   ins_encode %{
 9710     __ rorq($dst$$Register, $shift$$constant);
 9711   %}
 9712   ins_pipe(ialu_reg);
 9713 %}
 9714 
 9715 // Rotate Right by constant
 9716 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 9717 %{
 9718   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9719   match(Set dst (RotateRight src shift));
 9720   format %{ "rorxq   $dst, $src, $shift" %}
 9721   ins_encode %{
 9722     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 9723   %}
 9724   ins_pipe(ialu_reg_reg);
 9725 %}
 9726 
 9727 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9728 %{
 9729   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9730   match(Set dst (RotateRight (LoadL src) shift));
 9731   ins_cost(175);
 9732   format %{ "rorxq   $dst, $src, $shift" %}
 9733   ins_encode %{
 9734     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 9735   %}
 9736   ins_pipe(ialu_reg_mem);
 9737 %}
 9738 
 9739 // Rotate Right by variable
 9740 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9741 %{
 9742   predicate(n->bottom_type()->basic_type() == T_LONG);
 9743   match(Set dst (RotateRight dst shift));
 9744   effect(KILL cr);
 9745   format %{ "rorq    $dst, $shift" %}
 9746   ins_encode %{
 9747     __ rorq($dst$$Register);
 9748   %}
 9749   ins_pipe(ialu_reg_reg);
 9750 %}
 9751 
 9752 //----------------------------- CompressBits/ExpandBits ------------------------
 9753 
 9754 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9755   predicate(n->bottom_type()->isa_long());
 9756   match(Set dst (CompressBits src mask));
 9757   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9758   ins_encode %{
 9759     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 9760   %}
 9761   ins_pipe( pipe_slow );
 9762 %}
 9763 
 9764 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9765   predicate(n->bottom_type()->isa_long());
 9766   match(Set dst (ExpandBits src mask));
 9767   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9768   ins_encode %{
 9769     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 9770   %}
 9771   ins_pipe( pipe_slow );
 9772 %}
 9773 
 9774 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9775   predicate(n->bottom_type()->isa_long());
 9776   match(Set dst (CompressBits src (LoadL mask)));
 9777   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9778   ins_encode %{
 9779     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 9780   %}
 9781   ins_pipe( pipe_slow );
 9782 %}
 9783 
 9784 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9785   predicate(n->bottom_type()->isa_long());
 9786   match(Set dst (ExpandBits src (LoadL mask)));
 9787   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9788   ins_encode %{
 9789     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 9790   %}
 9791   ins_pipe( pipe_slow );
 9792 %}
 9793 
 9794 
 9795 // Logical Instructions
 9796 
 9797 // Integer Logical Instructions
 9798 
 9799 // And Instructions
 9800 // And Register with Register
 9801 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9802 %{
 9803   match(Set dst (AndI dst src));
 9804   effect(KILL cr);
 9805 
 9806   format %{ "andl    $dst, $src\t# int" %}
 9807   ins_encode %{
 9808     __ andl($dst$$Register, $src$$Register);
 9809   %}
 9810   ins_pipe(ialu_reg_reg);
 9811 %}
 9812 
 9813 // And Register with Immediate 255
 9814 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9815 %{
 9816   match(Set dst (AndI src mask));
 9817 
 9818   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9819   ins_encode %{
 9820     __ movzbl($dst$$Register, $src$$Register);
 9821   %}
 9822   ins_pipe(ialu_reg);
 9823 %}
 9824 
 9825 // And Register with Immediate 255 and promote to long
 9826 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9827 %{
 9828   match(Set dst (ConvI2L (AndI src mask)));
 9829 
 9830   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9831   ins_encode %{
 9832     __ movzbl($dst$$Register, $src$$Register);
 9833   %}
 9834   ins_pipe(ialu_reg);
 9835 %}
 9836 
 9837 // And Register with Immediate 65535
 9838 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9839 %{
 9840   match(Set dst (AndI src mask));
 9841 
 9842   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9843   ins_encode %{
 9844     __ movzwl($dst$$Register, $src$$Register);
 9845   %}
 9846   ins_pipe(ialu_reg);
 9847 %}
 9848 
 9849 // And Register with Immediate 65535 and promote to long
 9850 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9851 %{
 9852   match(Set dst (ConvI2L (AndI src mask)));
 9853 
 9854   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9855   ins_encode %{
 9856     __ movzwl($dst$$Register, $src$$Register);
 9857   %}
 9858   ins_pipe(ialu_reg);
 9859 %}
 9860 
 9861 // Can skip int2long conversions after AND with small bitmask
 9862 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9863 %{
 9864   predicate(VM_Version::supports_bmi2());
 9865   ins_cost(125);
 9866   effect(TEMP tmp, KILL cr);
 9867   match(Set dst (ConvI2L (AndI src mask)));
 9868   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9869   ins_encode %{
 9870     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9871     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9872   %}
 9873   ins_pipe(ialu_reg_reg);
 9874 %}
 9875 
 9876 // And Register with Immediate
 9877 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9878 %{
 9879   match(Set dst (AndI dst src));
 9880   effect(KILL cr);
 9881 
 9882   format %{ "andl    $dst, $src\t# int" %}
 9883   ins_encode %{
 9884     __ andl($dst$$Register, $src$$constant);
 9885   %}
 9886   ins_pipe(ialu_reg);
 9887 %}
 9888 
 9889 // And Register with Memory
 9890 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9891 %{
 9892   match(Set dst (AndI dst (LoadI src)));
 9893   effect(KILL cr);
 9894 
 9895   ins_cost(150);
 9896   format %{ "andl    $dst, $src\t# int" %}
 9897   ins_encode %{
 9898     __ andl($dst$$Register, $src$$Address);
 9899   %}
 9900   ins_pipe(ialu_reg_mem);
 9901 %}
 9902 
 9903 // And Memory with Register
 9904 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9905 %{
 9906   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9907   effect(KILL cr);
 9908 
 9909   ins_cost(150);
 9910   format %{ "andb    $dst, $src\t# byte" %}
 9911   ins_encode %{
 9912     __ andb($dst$$Address, $src$$Register);
 9913   %}
 9914   ins_pipe(ialu_mem_reg);
 9915 %}
 9916 
 9917 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9918 %{
 9919   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9920   effect(KILL cr);
 9921 
 9922   ins_cost(150);
 9923   format %{ "andl    $dst, $src\t# int" %}
 9924   ins_encode %{
 9925     __ andl($dst$$Address, $src$$Register);
 9926   %}
 9927   ins_pipe(ialu_mem_reg);
 9928 %}
 9929 
 9930 // And Memory with Immediate
 9931 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9932 %{
 9933   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9934   effect(KILL cr);
 9935 
 9936   ins_cost(125);
 9937   format %{ "andl    $dst, $src\t# int" %}
 9938   ins_encode %{
 9939     __ andl($dst$$Address, $src$$constant);
 9940   %}
 9941   ins_pipe(ialu_mem_imm);
 9942 %}
 9943 
 9944 // BMI1 instructions
 9945 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
 9946   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
 9947   predicate(UseBMI1Instructions);
 9948   effect(KILL cr);
 9949 
 9950   ins_cost(125);
 9951   format %{ "andnl  $dst, $src1, $src2" %}
 9952 
 9953   ins_encode %{
 9954     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 9955   %}
 9956   ins_pipe(ialu_reg_mem);
 9957 %}
 9958 
 9959 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
 9960   match(Set dst (AndI (XorI src1 minus_1) src2));
 9961   predicate(UseBMI1Instructions);
 9962   effect(KILL cr);
 9963 
 9964   format %{ "andnl  $dst, $src1, $src2" %}
 9965 
 9966   ins_encode %{
 9967     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 9968   %}
 9969   ins_pipe(ialu_reg);
 9970 %}
 9971 
 9972 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
 9973   match(Set dst (AndI (SubI imm_zero src) src));
 9974   predicate(UseBMI1Instructions);
 9975   effect(KILL cr);
 9976 
 9977   format %{ "blsil  $dst, $src" %}
 9978 
 9979   ins_encode %{
 9980     __ blsil($dst$$Register, $src$$Register);
 9981   %}
 9982   ins_pipe(ialu_reg);
 9983 %}
 9984 
 9985 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
 9986   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 9987   predicate(UseBMI1Instructions);
 9988   effect(KILL cr);
 9989 
 9990   ins_cost(125);
 9991   format %{ "blsil  $dst, $src" %}
 9992 
 9993   ins_encode %{
 9994     __ blsil($dst$$Register, $src$$Address);
 9995   %}
 9996   ins_pipe(ialu_reg_mem);
 9997 %}
 9998 
 9999 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10000 %{
10001   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
10002   predicate(UseBMI1Instructions);
10003   effect(KILL cr);
10004 
10005   ins_cost(125);
10006   format %{ "blsmskl $dst, $src" %}
10007 
10008   ins_encode %{
10009     __ blsmskl($dst$$Register, $src$$Address);
10010   %}
10011   ins_pipe(ialu_reg_mem);
10012 %}
10013 
10014 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10015 %{
10016   match(Set dst (XorI (AddI src minus_1) src));
10017   predicate(UseBMI1Instructions);
10018   effect(KILL cr);
10019 
10020   format %{ "blsmskl $dst, $src" %}
10021 
10022   ins_encode %{
10023     __ blsmskl($dst$$Register, $src$$Register);
10024   %}
10025 
10026   ins_pipe(ialu_reg);
10027 %}
10028 
10029 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10030 %{
10031   match(Set dst (AndI (AddI src minus_1) src) );
10032   predicate(UseBMI1Instructions);
10033   effect(KILL cr);
10034 
10035   format %{ "blsrl  $dst, $src" %}
10036 
10037   ins_encode %{
10038     __ blsrl($dst$$Register, $src$$Register);
10039   %}
10040 
10041   ins_pipe(ialu_reg_mem);
10042 %}
10043 
10044 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10045 %{
10046   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
10047   predicate(UseBMI1Instructions);
10048   effect(KILL cr);
10049 
10050   ins_cost(125);
10051   format %{ "blsrl  $dst, $src" %}
10052 
10053   ins_encode %{
10054     __ blsrl($dst$$Register, $src$$Address);
10055   %}
10056 
10057   ins_pipe(ialu_reg);
10058 %}
10059 
10060 // Or Instructions
10061 // Or Register with Register
10062 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10063 %{
10064   match(Set dst (OrI dst src));
10065   effect(KILL cr);
10066 
10067   format %{ "orl     $dst, $src\t# int" %}
10068   ins_encode %{
10069     __ orl($dst$$Register, $src$$Register);
10070   %}
10071   ins_pipe(ialu_reg_reg);
10072 %}
10073 
10074 // Or Register with Immediate
10075 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10076 %{
10077   match(Set dst (OrI dst src));
10078   effect(KILL cr);
10079 
10080   format %{ "orl     $dst, $src\t# int" %}
10081   ins_encode %{
10082     __ orl($dst$$Register, $src$$constant);
10083   %}
10084   ins_pipe(ialu_reg);
10085 %}
10086 
10087 // Or Register with Memory
10088 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10089 %{
10090   match(Set dst (OrI dst (LoadI src)));
10091   effect(KILL cr);
10092 
10093   ins_cost(150);
10094   format %{ "orl     $dst, $src\t# int" %}
10095   ins_encode %{
10096     __ orl($dst$$Register, $src$$Address);
10097   %}
10098   ins_pipe(ialu_reg_mem);
10099 %}
10100 
10101 // Or Memory with Register
10102 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10103 %{
10104   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
10105   effect(KILL cr);
10106 
10107   ins_cost(150);
10108   format %{ "orb    $dst, $src\t# byte" %}
10109   ins_encode %{
10110     __ orb($dst$$Address, $src$$Register);
10111   %}
10112   ins_pipe(ialu_mem_reg);
10113 %}
10114 
10115 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10116 %{
10117   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10118   effect(KILL cr);
10119 
10120   ins_cost(150);
10121   format %{ "orl     $dst, $src\t# int" %}
10122   ins_encode %{
10123     __ orl($dst$$Address, $src$$Register);
10124   %}
10125   ins_pipe(ialu_mem_reg);
10126 %}
10127 
10128 // Or Memory with Immediate
10129 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10130 %{
10131   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10132   effect(KILL cr);
10133 
10134   ins_cost(125);
10135   format %{ "orl     $dst, $src\t# int" %}
10136   ins_encode %{
10137     __ orl($dst$$Address, $src$$constant);
10138   %}
10139   ins_pipe(ialu_mem_imm);
10140 %}
10141 
10142 // Xor Instructions
10143 // Xor Register with Register
10144 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10145 %{
10146   match(Set dst (XorI dst src));
10147   effect(KILL cr);
10148 
10149   format %{ "xorl    $dst, $src\t# int" %}
10150   ins_encode %{
10151     __ xorl($dst$$Register, $src$$Register);
10152   %}
10153   ins_pipe(ialu_reg_reg);
10154 %}
10155 
10156 // Xor Register with Immediate -1
10157 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10158   match(Set dst (XorI dst imm));
10159 
10160   format %{ "not    $dst" %}
10161   ins_encode %{
10162      __ notl($dst$$Register);
10163   %}
10164   ins_pipe(ialu_reg);
10165 %}
10166 
10167 // Xor Register with Immediate
10168 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10169 %{
10170   match(Set dst (XorI dst src));
10171   effect(KILL cr);
10172 
10173   format %{ "xorl    $dst, $src\t# int" %}
10174   ins_encode %{
10175     __ xorl($dst$$Register, $src$$constant);
10176   %}
10177   ins_pipe(ialu_reg);
10178 %}
10179 
10180 // Xor Register with Memory
10181 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10182 %{
10183   match(Set dst (XorI dst (LoadI src)));
10184   effect(KILL cr);
10185 
10186   ins_cost(150);
10187   format %{ "xorl    $dst, $src\t# int" %}
10188   ins_encode %{
10189     __ xorl($dst$$Register, $src$$Address);
10190   %}
10191   ins_pipe(ialu_reg_mem);
10192 %}
10193 
10194 // Xor Memory with Register
10195 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10196 %{
10197   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
10198   effect(KILL cr);
10199 
10200   ins_cost(150);
10201   format %{ "xorb    $dst, $src\t# byte" %}
10202   ins_encode %{
10203     __ xorb($dst$$Address, $src$$Register);
10204   %}
10205   ins_pipe(ialu_mem_reg);
10206 %}
10207 
10208 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10209 %{
10210   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10211   effect(KILL cr);
10212 
10213   ins_cost(150);
10214   format %{ "xorl    $dst, $src\t# int" %}
10215   ins_encode %{
10216     __ xorl($dst$$Address, $src$$Register);
10217   %}
10218   ins_pipe(ialu_mem_reg);
10219 %}
10220 
10221 // Xor Memory with Immediate
10222 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10223 %{
10224   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10225   effect(KILL cr);
10226 
10227   ins_cost(125);
10228   format %{ "xorl    $dst, $src\t# int" %}
10229   ins_encode %{
10230     __ xorl($dst$$Address, $src$$constant);
10231   %}
10232   ins_pipe(ialu_mem_imm);
10233 %}
10234 
10235 
10236 // Long Logical Instructions
10237 
10238 // And Instructions
10239 // And Register with Register
10240 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10241 %{
10242   match(Set dst (AndL dst src));
10243   effect(KILL cr);
10244 
10245   format %{ "andq    $dst, $src\t# long" %}
10246   ins_encode %{
10247     __ andq($dst$$Register, $src$$Register);
10248   %}
10249   ins_pipe(ialu_reg_reg);
10250 %}
10251 
10252 // And Register with Immediate 255
10253 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
10254 %{
10255   match(Set dst (AndL src mask));
10256 
10257   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
10258   ins_encode %{
10259     // movzbl zeroes out the upper 32-bit and does not need REX.W
10260     __ movzbl($dst$$Register, $src$$Register);
10261   %}
10262   ins_pipe(ialu_reg);
10263 %}
10264 
10265 // And Register with Immediate 65535
10266 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
10267 %{
10268   match(Set dst (AndL src mask));
10269 
10270   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
10271   ins_encode %{
10272     // movzwl zeroes out the upper 32-bit and does not need REX.W
10273     __ movzwl($dst$$Register, $src$$Register);
10274   %}
10275   ins_pipe(ialu_reg);
10276 %}
10277 
10278 // And Register with Immediate
10279 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10280 %{
10281   match(Set dst (AndL dst src));
10282   effect(KILL cr);
10283 
10284   format %{ "andq    $dst, $src\t# long" %}
10285   ins_encode %{
10286     __ andq($dst$$Register, $src$$constant);
10287   %}
10288   ins_pipe(ialu_reg);
10289 %}
10290 
10291 // And Register with Memory
10292 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10293 %{
10294   match(Set dst (AndL dst (LoadL src)));
10295   effect(KILL cr);
10296 
10297   ins_cost(150);
10298   format %{ "andq    $dst, $src\t# long" %}
10299   ins_encode %{
10300     __ andq($dst$$Register, $src$$Address);
10301   %}
10302   ins_pipe(ialu_reg_mem);
10303 %}
10304 
10305 // And Memory with Register
10306 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10307 %{
10308   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10309   effect(KILL cr);
10310 
10311   ins_cost(150);
10312   format %{ "andq    $dst, $src\t# long" %}
10313   ins_encode %{
10314     __ andq($dst$$Address, $src$$Register);
10315   %}
10316   ins_pipe(ialu_mem_reg);
10317 %}
10318 
10319 // And Memory with Immediate
10320 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10321 %{
10322   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10323   effect(KILL cr);
10324 
10325   ins_cost(125);
10326   format %{ "andq    $dst, $src\t# long" %}
10327   ins_encode %{
10328     __ andq($dst$$Address, $src$$constant);
10329   %}
10330   ins_pipe(ialu_mem_imm);
10331 %}
10332 
10333 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
10334 %{
10335   // con should be a pure 64-bit immediate given that not(con) is a power of 2
10336   // because AND/OR works well enough for 8/32-bit values.
10337   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
10338 
10339   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
10340   effect(KILL cr);
10341 
10342   ins_cost(125);
10343   format %{ "btrq    $dst, log2(not($con))\t# long" %}
10344   ins_encode %{
10345     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
10346   %}
10347   ins_pipe(ialu_mem_imm);
10348 %}
10349 
10350 // BMI1 instructions
10351 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10352   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10353   predicate(UseBMI1Instructions);
10354   effect(KILL cr);
10355 
10356   ins_cost(125);
10357   format %{ "andnq  $dst, $src1, $src2" %}
10358 
10359   ins_encode %{
10360     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10361   %}
10362   ins_pipe(ialu_reg_mem);
10363 %}
10364 
10365 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10366   match(Set dst (AndL (XorL src1 minus_1) src2));
10367   predicate(UseBMI1Instructions);
10368   effect(KILL cr);
10369 
10370   format %{ "andnq  $dst, $src1, $src2" %}
10371 
10372   ins_encode %{
10373   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10374   %}
10375   ins_pipe(ialu_reg_mem);
10376 %}
10377 
10378 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10379   match(Set dst (AndL (SubL imm_zero src) src));
10380   predicate(UseBMI1Instructions);
10381   effect(KILL cr);
10382 
10383   format %{ "blsiq  $dst, $src" %}
10384 
10385   ins_encode %{
10386     __ blsiq($dst$$Register, $src$$Register);
10387   %}
10388   ins_pipe(ialu_reg);
10389 %}
10390 
10391 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10392   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10393   predicate(UseBMI1Instructions);
10394   effect(KILL cr);
10395 
10396   ins_cost(125);
10397   format %{ "blsiq  $dst, $src" %}
10398 
10399   ins_encode %{
10400     __ blsiq($dst$$Register, $src$$Address);
10401   %}
10402   ins_pipe(ialu_reg_mem);
10403 %}
10404 
10405 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10406 %{
10407   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10408   predicate(UseBMI1Instructions);
10409   effect(KILL cr);
10410 
10411   ins_cost(125);
10412   format %{ "blsmskq $dst, $src" %}
10413 
10414   ins_encode %{
10415     __ blsmskq($dst$$Register, $src$$Address);
10416   %}
10417   ins_pipe(ialu_reg_mem);
10418 %}
10419 
10420 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10421 %{
10422   match(Set dst (XorL (AddL src minus_1) src));
10423   predicate(UseBMI1Instructions);
10424   effect(KILL cr);
10425 
10426   format %{ "blsmskq $dst, $src" %}
10427 
10428   ins_encode %{
10429     __ blsmskq($dst$$Register, $src$$Register);
10430   %}
10431 
10432   ins_pipe(ialu_reg);
10433 %}
10434 
10435 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10436 %{
10437   match(Set dst (AndL (AddL src minus_1) src) );
10438   predicate(UseBMI1Instructions);
10439   effect(KILL cr);
10440 
10441   format %{ "blsrq  $dst, $src" %}
10442 
10443   ins_encode %{
10444     __ blsrq($dst$$Register, $src$$Register);
10445   %}
10446 
10447   ins_pipe(ialu_reg);
10448 %}
10449 
10450 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10451 %{
10452   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10453   predicate(UseBMI1Instructions);
10454   effect(KILL cr);
10455 
10456   ins_cost(125);
10457   format %{ "blsrq  $dst, $src" %}
10458 
10459   ins_encode %{
10460     __ blsrq($dst$$Register, $src$$Address);
10461   %}
10462 
10463   ins_pipe(ialu_reg);
10464 %}
10465 
10466 // Or Instructions
10467 // Or Register with Register
10468 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10469 %{
10470   match(Set dst (OrL dst src));
10471   effect(KILL cr);
10472 
10473   format %{ "orq     $dst, $src\t# long" %}
10474   ins_encode %{
10475     __ orq($dst$$Register, $src$$Register);
10476   %}
10477   ins_pipe(ialu_reg_reg);
10478 %}
10479 
10480 // Use any_RegP to match R15 (TLS register) without spilling.
10481 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10482   match(Set dst (OrL dst (CastP2X src)));
10483   effect(KILL cr);
10484 
10485   format %{ "orq     $dst, $src\t# long" %}
10486   ins_encode %{
10487     __ orq($dst$$Register, $src$$Register);
10488   %}
10489   ins_pipe(ialu_reg_reg);
10490 %}
10491 
10492 
10493 // Or Register with Immediate
10494 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10495 %{
10496   match(Set dst (OrL dst src));
10497   effect(KILL cr);
10498 
10499   format %{ "orq     $dst, $src\t# long" %}
10500   ins_encode %{
10501     __ orq($dst$$Register, $src$$constant);
10502   %}
10503   ins_pipe(ialu_reg);
10504 %}
10505 
10506 // Or Register with Memory
10507 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10508 %{
10509   match(Set dst (OrL dst (LoadL src)));
10510   effect(KILL cr);
10511 
10512   ins_cost(150);
10513   format %{ "orq     $dst, $src\t# long" %}
10514   ins_encode %{
10515     __ orq($dst$$Register, $src$$Address);
10516   %}
10517   ins_pipe(ialu_reg_mem);
10518 %}
10519 
10520 // Or Memory with Register
10521 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10522 %{
10523   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10524   effect(KILL cr);
10525 
10526   ins_cost(150);
10527   format %{ "orq     $dst, $src\t# long" %}
10528   ins_encode %{
10529     __ orq($dst$$Address, $src$$Register);
10530   %}
10531   ins_pipe(ialu_mem_reg);
10532 %}
10533 
10534 // Or Memory with Immediate
10535 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10536 %{
10537   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10538   effect(KILL cr);
10539 
10540   ins_cost(125);
10541   format %{ "orq     $dst, $src\t# long" %}
10542   ins_encode %{
10543     __ orq($dst$$Address, $src$$constant);
10544   %}
10545   ins_pipe(ialu_mem_imm);
10546 %}
10547 
10548 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
10549 %{
10550   // con should be a pure 64-bit power of 2 immediate
10551   // because AND/OR works well enough for 8/32-bit values.
10552   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
10553 
10554   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
10555   effect(KILL cr);
10556 
10557   ins_cost(125);
10558   format %{ "btsq    $dst, log2($con)\t# long" %}
10559   ins_encode %{
10560     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
10561   %}
10562   ins_pipe(ialu_mem_imm);
10563 %}
10564 
10565 // Xor Instructions
10566 // Xor Register with Register
10567 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10568 %{
10569   match(Set dst (XorL dst src));
10570   effect(KILL cr);
10571 
10572   format %{ "xorq    $dst, $src\t# long" %}
10573   ins_encode %{
10574     __ xorq($dst$$Register, $src$$Register);
10575   %}
10576   ins_pipe(ialu_reg_reg);
10577 %}
10578 
10579 // Xor Register with Immediate -1
10580 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10581   match(Set dst (XorL dst imm));
10582 
10583   format %{ "notq   $dst" %}
10584   ins_encode %{
10585      __ notq($dst$$Register);
10586   %}
10587   ins_pipe(ialu_reg);
10588 %}
10589 
10590 // Xor Register with Immediate
10591 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10592 %{
10593   match(Set dst (XorL dst src));
10594   effect(KILL cr);
10595 
10596   format %{ "xorq    $dst, $src\t# long" %}
10597   ins_encode %{
10598     __ xorq($dst$$Register, $src$$constant);
10599   %}
10600   ins_pipe(ialu_reg);
10601 %}
10602 
10603 // Xor Register with Memory
10604 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10605 %{
10606   match(Set dst (XorL dst (LoadL src)));
10607   effect(KILL cr);
10608 
10609   ins_cost(150);
10610   format %{ "xorq    $dst, $src\t# long" %}
10611   ins_encode %{
10612     __ xorq($dst$$Register, $src$$Address);
10613   %}
10614   ins_pipe(ialu_reg_mem);
10615 %}
10616 
10617 // Xor Memory with Register
10618 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10619 %{
10620   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10621   effect(KILL cr);
10622 
10623   ins_cost(150);
10624   format %{ "xorq    $dst, $src\t# long" %}
10625   ins_encode %{
10626     __ xorq($dst$$Address, $src$$Register);
10627   %}
10628   ins_pipe(ialu_mem_reg);
10629 %}
10630 
10631 // Xor Memory with Immediate
10632 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10633 %{
10634   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10635   effect(KILL cr);
10636 
10637   ins_cost(125);
10638   format %{ "xorq    $dst, $src\t# long" %}
10639   ins_encode %{
10640     __ xorq($dst$$Address, $src$$constant);
10641   %}
10642   ins_pipe(ialu_mem_imm);
10643 %}
10644 
10645 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10646 %{
10647   match(Set dst (CmpLTMask p q));
10648   effect(KILL cr);
10649 
10650   ins_cost(400);
10651   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10652             "setlt   $dst\n\t"
10653             "movzbl  $dst, $dst\n\t"
10654             "negl    $dst" %}
10655   ins_encode %{
10656     __ cmpl($p$$Register, $q$$Register);
10657     __ setb(Assembler::less, $dst$$Register);
10658     __ movzbl($dst$$Register, $dst$$Register);
10659     __ negl($dst$$Register);
10660   %}
10661   ins_pipe(pipe_slow);
10662 %}
10663 
10664 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
10665 %{
10666   match(Set dst (CmpLTMask dst zero));
10667   effect(KILL cr);
10668 
10669   ins_cost(100);
10670   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10671   ins_encode %{
10672     __ sarl($dst$$Register, 31);
10673   %}
10674   ins_pipe(ialu_reg);
10675 %}
10676 
10677 /* Better to save a register than avoid a branch */
10678 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10679 %{
10680   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10681   effect(KILL cr);
10682   ins_cost(300);
10683   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10684             "jge     done\n\t"
10685             "addl    $p,$y\n"
10686             "done:   " %}
10687   ins_encode %{
10688     Register Rp = $p$$Register;
10689     Register Rq = $q$$Register;
10690     Register Ry = $y$$Register;
10691     Label done;
10692     __ subl(Rp, Rq);
10693     __ jccb(Assembler::greaterEqual, done);
10694     __ addl(Rp, Ry);
10695     __ bind(done);
10696   %}
10697   ins_pipe(pipe_cmplt);
10698 %}
10699 
10700 /* Better to save a register than avoid a branch */
10701 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10702 %{
10703   match(Set y (AndI (CmpLTMask p q) y));
10704   effect(KILL cr);
10705 
10706   ins_cost(300);
10707 
10708   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10709             "jlt     done\n\t"
10710             "xorl    $y, $y\n"
10711             "done:   " %}
10712   ins_encode %{
10713     Register Rp = $p$$Register;
10714     Register Rq = $q$$Register;
10715     Register Ry = $y$$Register;
10716     Label done;
10717     __ cmpl(Rp, Rq);
10718     __ jccb(Assembler::less, done);
10719     __ xorl(Ry, Ry);
10720     __ bind(done);
10721   %}
10722   ins_pipe(pipe_cmplt);
10723 %}
10724 
10725 
10726 //---------- FP Instructions------------------------------------------------
10727 
10728 // Really expensive, avoid
10729 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10730 %{
10731   match(Set cr (CmpF src1 src2));
10732 
10733   ins_cost(500);
10734   format %{ "ucomiss $src1, $src2\n\t"
10735             "jnp,s   exit\n\t"
10736             "pushfq\t# saw NaN, set CF\n\t"
10737             "andq    [rsp], #0xffffff2b\n\t"
10738             "popfq\n"
10739     "exit:" %}
10740   ins_encode %{
10741     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10742     emit_cmpfp_fixup(_masm);
10743   %}
10744   ins_pipe(pipe_slow);
10745 %}
10746 
10747 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10748   match(Set cr (CmpF src1 src2));
10749 
10750   ins_cost(100);
10751   format %{ "ucomiss $src1, $src2" %}
10752   ins_encode %{
10753     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10754   %}
10755   ins_pipe(pipe_slow);
10756 %}
10757 
10758 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10759   match(Set cr (CmpF src1 (LoadF src2)));
10760 
10761   ins_cost(100);
10762   format %{ "ucomiss $src1, $src2" %}
10763   ins_encode %{
10764     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10765   %}
10766   ins_pipe(pipe_slow);
10767 %}
10768 
10769 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10770   match(Set cr (CmpF src con));
10771   ins_cost(100);
10772   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10773   ins_encode %{
10774     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10775   %}
10776   ins_pipe(pipe_slow);
10777 %}
10778 
10779 // Really expensive, avoid
10780 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10781 %{
10782   match(Set cr (CmpD src1 src2));
10783 
10784   ins_cost(500);
10785   format %{ "ucomisd $src1, $src2\n\t"
10786             "jnp,s   exit\n\t"
10787             "pushfq\t# saw NaN, set CF\n\t"
10788             "andq    [rsp], #0xffffff2b\n\t"
10789             "popfq\n"
10790     "exit:" %}
10791   ins_encode %{
10792     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10793     emit_cmpfp_fixup(_masm);
10794   %}
10795   ins_pipe(pipe_slow);
10796 %}
10797 
10798 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10799   match(Set cr (CmpD src1 src2));
10800 
10801   ins_cost(100);
10802   format %{ "ucomisd $src1, $src2 test" %}
10803   ins_encode %{
10804     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10805   %}
10806   ins_pipe(pipe_slow);
10807 %}
10808 
10809 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10810   match(Set cr (CmpD src1 (LoadD src2)));
10811 
10812   ins_cost(100);
10813   format %{ "ucomisd $src1, $src2" %}
10814   ins_encode %{
10815     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10816   %}
10817   ins_pipe(pipe_slow);
10818 %}
10819 
10820 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10821   match(Set cr (CmpD src con));
10822   ins_cost(100);
10823   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10824   ins_encode %{
10825     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10826   %}
10827   ins_pipe(pipe_slow);
10828 %}
10829 
10830 // Compare into -1,0,1
10831 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10832 %{
10833   match(Set dst (CmpF3 src1 src2));
10834   effect(KILL cr);
10835 
10836   ins_cost(275);
10837   format %{ "ucomiss $src1, $src2\n\t"
10838             "movl    $dst, #-1\n\t"
10839             "jp,s    done\n\t"
10840             "jb,s    done\n\t"
10841             "setne   $dst\n\t"
10842             "movzbl  $dst, $dst\n"
10843     "done:" %}
10844   ins_encode %{
10845     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10846     emit_cmpfp3(_masm, $dst$$Register);
10847   %}
10848   ins_pipe(pipe_slow);
10849 %}
10850 
10851 // Compare into -1,0,1
10852 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10853 %{
10854   match(Set dst (CmpF3 src1 (LoadF src2)));
10855   effect(KILL cr);
10856 
10857   ins_cost(275);
10858   format %{ "ucomiss $src1, $src2\n\t"
10859             "movl    $dst, #-1\n\t"
10860             "jp,s    done\n\t"
10861             "jb,s    done\n\t"
10862             "setne   $dst\n\t"
10863             "movzbl  $dst, $dst\n"
10864     "done:" %}
10865   ins_encode %{
10866     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10867     emit_cmpfp3(_masm, $dst$$Register);
10868   %}
10869   ins_pipe(pipe_slow);
10870 %}
10871 
10872 // Compare into -1,0,1
10873 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10874   match(Set dst (CmpF3 src con));
10875   effect(KILL cr);
10876 
10877   ins_cost(275);
10878   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10879             "movl    $dst, #-1\n\t"
10880             "jp,s    done\n\t"
10881             "jb,s    done\n\t"
10882             "setne   $dst\n\t"
10883             "movzbl  $dst, $dst\n"
10884     "done:" %}
10885   ins_encode %{
10886     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10887     emit_cmpfp3(_masm, $dst$$Register);
10888   %}
10889   ins_pipe(pipe_slow);
10890 %}
10891 
10892 // Compare into -1,0,1
10893 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10894 %{
10895   match(Set dst (CmpD3 src1 src2));
10896   effect(KILL cr);
10897 
10898   ins_cost(275);
10899   format %{ "ucomisd $src1, $src2\n\t"
10900             "movl    $dst, #-1\n\t"
10901             "jp,s    done\n\t"
10902             "jb,s    done\n\t"
10903             "setne   $dst\n\t"
10904             "movzbl  $dst, $dst\n"
10905     "done:" %}
10906   ins_encode %{
10907     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10908     emit_cmpfp3(_masm, $dst$$Register);
10909   %}
10910   ins_pipe(pipe_slow);
10911 %}
10912 
10913 // Compare into -1,0,1
10914 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10915 %{
10916   match(Set dst (CmpD3 src1 (LoadD src2)));
10917   effect(KILL cr);
10918 
10919   ins_cost(275);
10920   format %{ "ucomisd $src1, $src2\n\t"
10921             "movl    $dst, #-1\n\t"
10922             "jp,s    done\n\t"
10923             "jb,s    done\n\t"
10924             "setne   $dst\n\t"
10925             "movzbl  $dst, $dst\n"
10926     "done:" %}
10927   ins_encode %{
10928     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10929     emit_cmpfp3(_masm, $dst$$Register);
10930   %}
10931   ins_pipe(pipe_slow);
10932 %}
10933 
10934 // Compare into -1,0,1
10935 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10936   match(Set dst (CmpD3 src con));
10937   effect(KILL cr);
10938 
10939   ins_cost(275);
10940   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10941             "movl    $dst, #-1\n\t"
10942             "jp,s    done\n\t"
10943             "jb,s    done\n\t"
10944             "setne   $dst\n\t"
10945             "movzbl  $dst, $dst\n"
10946     "done:" %}
10947   ins_encode %{
10948     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10949     emit_cmpfp3(_masm, $dst$$Register);
10950   %}
10951   ins_pipe(pipe_slow);
10952 %}
10953 
10954 //----------Arithmetic Conversion Instructions---------------------------------
10955 
10956 instruct convF2D_reg_reg(regD dst, regF src)
10957 %{
10958   match(Set dst (ConvF2D src));
10959 
10960   format %{ "cvtss2sd $dst, $src" %}
10961   ins_encode %{
10962     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10963   %}
10964   ins_pipe(pipe_slow); // XXX
10965 %}
10966 
10967 instruct convF2D_reg_mem(regD dst, memory src)
10968 %{
10969   match(Set dst (ConvF2D (LoadF src)));
10970 
10971   format %{ "cvtss2sd $dst, $src" %}
10972   ins_encode %{
10973     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10974   %}
10975   ins_pipe(pipe_slow); // XXX
10976 %}
10977 
10978 instruct convD2F_reg_reg(regF dst, regD src)
10979 %{
10980   match(Set dst (ConvD2F src));
10981 
10982   format %{ "cvtsd2ss $dst, $src" %}
10983   ins_encode %{
10984     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10985   %}
10986   ins_pipe(pipe_slow); // XXX
10987 %}
10988 
10989 instruct convD2F_reg_mem(regF dst, memory src)
10990 %{
10991   match(Set dst (ConvD2F (LoadD src)));
10992 
10993   format %{ "cvtsd2ss $dst, $src" %}
10994   ins_encode %{
10995     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10996   %}
10997   ins_pipe(pipe_slow); // XXX
10998 %}
10999 
11000 // XXX do mem variants
11001 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11002 %{
11003   match(Set dst (ConvF2I src));
11004   effect(KILL cr);
11005   format %{ "convert_f2i $dst, $src" %}
11006   ins_encode %{
11007     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11008   %}
11009   ins_pipe(pipe_slow);
11010 %}
11011 
11012 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11013 %{
11014   match(Set dst (ConvF2L src));
11015   effect(KILL cr);
11016   format %{ "convert_f2l $dst, $src"%}
11017   ins_encode %{
11018     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11019   %}
11020   ins_pipe(pipe_slow);
11021 %}
11022 
11023 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11024 %{
11025   match(Set dst (ConvD2I src));
11026   effect(KILL cr);
11027   format %{ "convert_d2i $dst, $src"%}
11028   ins_encode %{
11029     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11030   %}
11031   ins_pipe(pipe_slow);
11032 %}
11033 
11034 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11035 %{
11036   match(Set dst (ConvD2L src));
11037   effect(KILL cr);
11038   format %{ "convert_d2l $dst, $src"%}
11039   ins_encode %{
11040     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11041   %}
11042   ins_pipe(pipe_slow);
11043 %}
11044 
11045 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11046 %{
11047   match(Set dst (RoundD src));
11048   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11049   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
11050   ins_encode %{
11051     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11052   %}
11053   ins_pipe(pipe_slow);
11054 %}
11055 
11056 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11057 %{
11058   match(Set dst (RoundF src));
11059   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11060   format %{ "round_float $dst,$src" %}
11061   ins_encode %{
11062     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11063   %}
11064   ins_pipe(pipe_slow);
11065 %}
11066 
11067 instruct convI2F_reg_reg(regF dst, rRegI src)
11068 %{
11069   predicate(!UseXmmI2F);
11070   match(Set dst (ConvI2F src));
11071 
11072   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11073   ins_encode %{
11074     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11075   %}
11076   ins_pipe(pipe_slow); // XXX
11077 %}
11078 
11079 instruct convI2F_reg_mem(regF dst, memory src)
11080 %{
11081   match(Set dst (ConvI2F (LoadI src)));
11082 
11083   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11084   ins_encode %{
11085     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
11086   %}
11087   ins_pipe(pipe_slow); // XXX
11088 %}
11089 
11090 instruct convI2D_reg_reg(regD dst, rRegI src)
11091 %{
11092   predicate(!UseXmmI2D);
11093   match(Set dst (ConvI2D src));
11094 
11095   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11096   ins_encode %{
11097     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11098   %}
11099   ins_pipe(pipe_slow); // XXX
11100 %}
11101 
11102 instruct convI2D_reg_mem(regD dst, memory src)
11103 %{
11104   match(Set dst (ConvI2D (LoadI src)));
11105 
11106   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11107   ins_encode %{
11108     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
11109   %}
11110   ins_pipe(pipe_slow); // XXX
11111 %}
11112 
11113 instruct convXI2F_reg(regF dst, rRegI src)
11114 %{
11115   predicate(UseXmmI2F);
11116   match(Set dst (ConvI2F src));
11117 
11118   format %{ "movdl $dst, $src\n\t"
11119             "cvtdq2psl $dst, $dst\t# i2f" %}
11120   ins_encode %{
11121     __ movdl($dst$$XMMRegister, $src$$Register);
11122     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11123   %}
11124   ins_pipe(pipe_slow); // XXX
11125 %}
11126 
11127 instruct convXI2D_reg(regD dst, rRegI src)
11128 %{
11129   predicate(UseXmmI2D);
11130   match(Set dst (ConvI2D src));
11131 
11132   format %{ "movdl $dst, $src\n\t"
11133             "cvtdq2pdl $dst, $dst\t# i2d" %}
11134   ins_encode %{
11135     __ movdl($dst$$XMMRegister, $src$$Register);
11136     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11137   %}
11138   ins_pipe(pipe_slow); // XXX
11139 %}
11140 
11141 instruct convL2F_reg_reg(regF dst, rRegL src)
11142 %{
11143   match(Set dst (ConvL2F src));
11144 
11145   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11146   ins_encode %{
11147     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11148   %}
11149   ins_pipe(pipe_slow); // XXX
11150 %}
11151 
11152 instruct convL2F_reg_mem(regF dst, memory src)
11153 %{
11154   match(Set dst (ConvL2F (LoadL src)));
11155 
11156   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11157   ins_encode %{
11158     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11159   %}
11160   ins_pipe(pipe_slow); // XXX
11161 %}
11162 
11163 instruct convL2D_reg_reg(regD dst, rRegL src)
11164 %{
11165   match(Set dst (ConvL2D src));
11166 
11167   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11168   ins_encode %{
11169     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11170   %}
11171   ins_pipe(pipe_slow); // XXX
11172 %}
11173 
11174 instruct convL2D_reg_mem(regD dst, memory src)
11175 %{
11176   match(Set dst (ConvL2D (LoadL src)));
11177 
11178   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11179   ins_encode %{
11180     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11181   %}
11182   ins_pipe(pipe_slow); // XXX
11183 %}
11184 
11185 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11186 %{
11187   match(Set dst (ConvI2L src));
11188 
11189   ins_cost(125);
11190   format %{ "movslq  $dst, $src\t# i2l" %}
11191   ins_encode %{
11192     __ movslq($dst$$Register, $src$$Register);
11193   %}
11194   ins_pipe(ialu_reg_reg);
11195 %}
11196 
11197 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11198 // %{
11199 //   match(Set dst (ConvI2L src));
11200 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11201 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11202 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11203 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11204 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11205 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11206 
11207 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11208 //   ins_encode(enc_copy(dst, src));
11209 // //   opcode(0x63); // needs REX.W
11210 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11211 //   ins_pipe(ialu_reg_reg);
11212 // %}
11213 
11214 // Zero-extend convert int to long
11215 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11216 %{
11217   match(Set dst (AndL (ConvI2L src) mask));
11218 
11219   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11220   ins_encode %{
11221     if ($dst$$reg != $src$$reg) {
11222       __ movl($dst$$Register, $src$$Register);
11223     }
11224   %}
11225   ins_pipe(ialu_reg_reg);
11226 %}
11227 
11228 // Zero-extend convert int to long
11229 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11230 %{
11231   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11232 
11233   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11234   ins_encode %{
11235     __ movl($dst$$Register, $src$$Address);
11236   %}
11237   ins_pipe(ialu_reg_mem);
11238 %}
11239 
11240 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11241 %{
11242   match(Set dst (AndL src mask));
11243 
11244   format %{ "movl    $dst, $src\t# zero-extend long" %}
11245   ins_encode %{
11246     __ movl($dst$$Register, $src$$Register);
11247   %}
11248   ins_pipe(ialu_reg_reg);
11249 %}
11250 
11251 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11252 %{
11253   match(Set dst (ConvL2I src));
11254 
11255   format %{ "movl    $dst, $src\t# l2i" %}
11256   ins_encode %{
11257     __ movl($dst$$Register, $src$$Register);
11258   %}
11259   ins_pipe(ialu_reg_reg);
11260 %}
11261 
11262 
11263 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11264   match(Set dst (MoveF2I src));
11265   effect(DEF dst, USE src);
11266 
11267   ins_cost(125);
11268   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11269   ins_encode %{
11270     __ movl($dst$$Register, Address(rsp, $src$$disp));
11271   %}
11272   ins_pipe(ialu_reg_mem);
11273 %}
11274 
11275 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11276   match(Set dst (MoveI2F src));
11277   effect(DEF dst, USE src);
11278 
11279   ins_cost(125);
11280   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11281   ins_encode %{
11282     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11283   %}
11284   ins_pipe(pipe_slow);
11285 %}
11286 
11287 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11288   match(Set dst (MoveD2L src));
11289   effect(DEF dst, USE src);
11290 
11291   ins_cost(125);
11292   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11293   ins_encode %{
11294     __ movq($dst$$Register, Address(rsp, $src$$disp));
11295   %}
11296   ins_pipe(ialu_reg_mem);
11297 %}
11298 
11299 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11300   predicate(!UseXmmLoadAndClearUpper);
11301   match(Set dst (MoveL2D src));
11302   effect(DEF dst, USE src);
11303 
11304   ins_cost(125);
11305   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11306   ins_encode %{
11307     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11308   %}
11309   ins_pipe(pipe_slow);
11310 %}
11311 
11312 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11313   predicate(UseXmmLoadAndClearUpper);
11314   match(Set dst (MoveL2D src));
11315   effect(DEF dst, USE src);
11316 
11317   ins_cost(125);
11318   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11319   ins_encode %{
11320     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11321   %}
11322   ins_pipe(pipe_slow);
11323 %}
11324 
11325 
11326 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11327   match(Set dst (MoveF2I src));
11328   effect(DEF dst, USE src);
11329 
11330   ins_cost(95); // XXX
11331   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11332   ins_encode %{
11333     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11334   %}
11335   ins_pipe(pipe_slow);
11336 %}
11337 
11338 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11339   match(Set dst (MoveI2F src));
11340   effect(DEF dst, USE src);
11341 
11342   ins_cost(100);
11343   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11344   ins_encode %{
11345     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11346   %}
11347   ins_pipe( ialu_mem_reg );
11348 %}
11349 
11350 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11351   match(Set dst (MoveD2L src));
11352   effect(DEF dst, USE src);
11353 
11354   ins_cost(95); // XXX
11355   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11356   ins_encode %{
11357     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11358   %}
11359   ins_pipe(pipe_slow);
11360 %}
11361 
11362 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11363   match(Set dst (MoveL2D src));
11364   effect(DEF dst, USE src);
11365 
11366   ins_cost(100);
11367   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11368   ins_encode %{
11369     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11370   %}
11371   ins_pipe(ialu_mem_reg);
11372 %}
11373 
11374 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11375   match(Set dst (MoveF2I src));
11376   effect(DEF dst, USE src);
11377   ins_cost(85);
11378   format %{ "movd    $dst,$src\t# MoveF2I" %}
11379   ins_encode %{
11380     __ movdl($dst$$Register, $src$$XMMRegister);
11381   %}
11382   ins_pipe( pipe_slow );
11383 %}
11384 
11385 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11386   match(Set dst (MoveD2L src));
11387   effect(DEF dst, USE src);
11388   ins_cost(85);
11389   format %{ "movd    $dst,$src\t# MoveD2L" %}
11390   ins_encode %{
11391     __ movdq($dst$$Register, $src$$XMMRegister);
11392   %}
11393   ins_pipe( pipe_slow );
11394 %}
11395 
11396 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11397   match(Set dst (MoveI2F src));
11398   effect(DEF dst, USE src);
11399   ins_cost(100);
11400   format %{ "movd    $dst,$src\t# MoveI2F" %}
11401   ins_encode %{
11402     __ movdl($dst$$XMMRegister, $src$$Register);
11403   %}
11404   ins_pipe( pipe_slow );
11405 %}
11406 
11407 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11408   match(Set dst (MoveL2D src));
11409   effect(DEF dst, USE src);
11410   ins_cost(100);
11411   format %{ "movd    $dst,$src\t# MoveL2D" %}
11412   ins_encode %{
11413      __ movdq($dst$$XMMRegister, $src$$Register);
11414   %}
11415   ins_pipe( pipe_slow );
11416 %}
11417 
11418 // Fast clearing of an array
11419 // Small ClearArray non-AVX512.
11420 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11421                   Universe dummy, rFlagsReg cr)
11422 %{
11423   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11424   match(Set dummy (ClearArray cnt base));
11425   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11426 
11427   format %{ $$template
11428     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11429     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11430     $$emit$$"jg      LARGE\n\t"
11431     $$emit$$"dec     rcx\n\t"
11432     $$emit$$"js      DONE\t# Zero length\n\t"
11433     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11434     $$emit$$"dec     rcx\n\t"
11435     $$emit$$"jge     LOOP\n\t"
11436     $$emit$$"jmp     DONE\n\t"
11437     $$emit$$"# LARGE:\n\t"
11438     if (UseFastStosb) {
11439        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11440        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11441     } else if (UseXMMForObjInit) {
11442        $$emit$$"mov     rdi,rax\n\t"
11443        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11444        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11445        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11446        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11447        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11448        $$emit$$"add     0x40,rax\n\t"
11449        $$emit$$"# L_zero_64_bytes:\n\t"
11450        $$emit$$"sub     0x8,rcx\n\t"
11451        $$emit$$"jge     L_loop\n\t"
11452        $$emit$$"add     0x4,rcx\n\t"
11453        $$emit$$"jl      L_tail\n\t"
11454        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11455        $$emit$$"add     0x20,rax\n\t"
11456        $$emit$$"sub     0x4,rcx\n\t"
11457        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11458        $$emit$$"add     0x4,rcx\n\t"
11459        $$emit$$"jle     L_end\n\t"
11460        $$emit$$"dec     rcx\n\t"
11461        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11462        $$emit$$"vmovq   xmm0,(rax)\n\t"
11463        $$emit$$"add     0x8,rax\n\t"
11464        $$emit$$"dec     rcx\n\t"
11465        $$emit$$"jge     L_sloop\n\t"
11466        $$emit$$"# L_end:\n\t"
11467     } else {
11468        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11469     }
11470     $$emit$$"# DONE"
11471   %}
11472   ins_encode %{
11473     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11474                  $tmp$$XMMRegister, false, knoreg);
11475   %}
11476   ins_pipe(pipe_slow);
11477 %}
11478 
11479 // Small ClearArray AVX512 non-constant length.
11480 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11481                        Universe dummy, rFlagsReg cr)
11482 %{
11483   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11484   match(Set dummy (ClearArray cnt base));
11485   ins_cost(125);
11486   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11487 
11488   format %{ $$template
11489     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11490     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11491     $$emit$$"jg      LARGE\n\t"
11492     $$emit$$"dec     rcx\n\t"
11493     $$emit$$"js      DONE\t# Zero length\n\t"
11494     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11495     $$emit$$"dec     rcx\n\t"
11496     $$emit$$"jge     LOOP\n\t"
11497     $$emit$$"jmp     DONE\n\t"
11498     $$emit$$"# LARGE:\n\t"
11499     if (UseFastStosb) {
11500        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11501        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11502     } else if (UseXMMForObjInit) {
11503        $$emit$$"mov     rdi,rax\n\t"
11504        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11505        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11506        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11507        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11508        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11509        $$emit$$"add     0x40,rax\n\t"
11510        $$emit$$"# L_zero_64_bytes:\n\t"
11511        $$emit$$"sub     0x8,rcx\n\t"
11512        $$emit$$"jge     L_loop\n\t"
11513        $$emit$$"add     0x4,rcx\n\t"
11514        $$emit$$"jl      L_tail\n\t"
11515        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11516        $$emit$$"add     0x20,rax\n\t"
11517        $$emit$$"sub     0x4,rcx\n\t"
11518        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11519        $$emit$$"add     0x4,rcx\n\t"
11520        $$emit$$"jle     L_end\n\t"
11521        $$emit$$"dec     rcx\n\t"
11522        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11523        $$emit$$"vmovq   xmm0,(rax)\n\t"
11524        $$emit$$"add     0x8,rax\n\t"
11525        $$emit$$"dec     rcx\n\t"
11526        $$emit$$"jge     L_sloop\n\t"
11527        $$emit$$"# L_end:\n\t"
11528     } else {
11529        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11530     }
11531     $$emit$$"# DONE"
11532   %}
11533   ins_encode %{
11534     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11535                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11536   %}
11537   ins_pipe(pipe_slow);
11538 %}
11539 
11540 // Large ClearArray non-AVX512.
11541 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11542                         Universe dummy, rFlagsReg cr)
11543 %{
11544   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11545   match(Set dummy (ClearArray cnt base));
11546   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11547 
11548   format %{ $$template
11549     if (UseFastStosb) {
11550        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11551        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11552        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11553     } else if (UseXMMForObjInit) {
11554        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11555        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11556        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11557        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11558        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11559        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11560        $$emit$$"add     0x40,rax\n\t"
11561        $$emit$$"# L_zero_64_bytes:\n\t"
11562        $$emit$$"sub     0x8,rcx\n\t"
11563        $$emit$$"jge     L_loop\n\t"
11564        $$emit$$"add     0x4,rcx\n\t"
11565        $$emit$$"jl      L_tail\n\t"
11566        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11567        $$emit$$"add     0x20,rax\n\t"
11568        $$emit$$"sub     0x4,rcx\n\t"
11569        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11570        $$emit$$"add     0x4,rcx\n\t"
11571        $$emit$$"jle     L_end\n\t"
11572        $$emit$$"dec     rcx\n\t"
11573        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11574        $$emit$$"vmovq   xmm0,(rax)\n\t"
11575        $$emit$$"add     0x8,rax\n\t"
11576        $$emit$$"dec     rcx\n\t"
11577        $$emit$$"jge     L_sloop\n\t"
11578        $$emit$$"# L_end:\n\t"
11579     } else {
11580        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11581        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11582     }
11583   %}
11584   ins_encode %{
11585     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11586                  $tmp$$XMMRegister, true, knoreg);
11587   %}
11588   ins_pipe(pipe_slow);
11589 %}
11590 
11591 // Large ClearArray AVX512.
11592 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11593                              Universe dummy, rFlagsReg cr)
11594 %{
11595   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11596   match(Set dummy (ClearArray cnt base));
11597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11598 
11599   format %{ $$template
11600     if (UseFastStosb) {
11601        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11602        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11603        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11604     } else if (UseXMMForObjInit) {
11605        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11606        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11607        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11609        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11610        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11611        $$emit$$"add     0x40,rax\n\t"
11612        $$emit$$"# L_zero_64_bytes:\n\t"
11613        $$emit$$"sub     0x8,rcx\n\t"
11614        $$emit$$"jge     L_loop\n\t"
11615        $$emit$$"add     0x4,rcx\n\t"
11616        $$emit$$"jl      L_tail\n\t"
11617        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11618        $$emit$$"add     0x20,rax\n\t"
11619        $$emit$$"sub     0x4,rcx\n\t"
11620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11621        $$emit$$"add     0x4,rcx\n\t"
11622        $$emit$$"jle     L_end\n\t"
11623        $$emit$$"dec     rcx\n\t"
11624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11625        $$emit$$"vmovq   xmm0,(rax)\n\t"
11626        $$emit$$"add     0x8,rax\n\t"
11627        $$emit$$"dec     rcx\n\t"
11628        $$emit$$"jge     L_sloop\n\t"
11629        $$emit$$"# L_end:\n\t"
11630     } else {
11631        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11632        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11633     }
11634   %}
11635   ins_encode %{
11636     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11637                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11638   %}
11639   ins_pipe(pipe_slow);
11640 %}
11641 
11642 // Small ClearArray AVX512 constant length.
11643 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11644 %{
11645   predicate(!((ClearArrayNode*)n)->is_large() &&
11646               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11647   match(Set dummy (ClearArray cnt base));
11648   ins_cost(100);
11649   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11650   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11651   ins_encode %{
11652    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11653   %}
11654   ins_pipe(pipe_slow);
11655 %}
11656 
11657 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11658                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11659 %{
11660   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11661   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11662   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11663 
11664   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11665   ins_encode %{
11666     __ string_compare($str1$$Register, $str2$$Register,
11667                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11668                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11669   %}
11670   ins_pipe( pipe_slow );
11671 %}
11672 
11673 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11674                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11675 %{
11676   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11677   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11678   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11679 
11680   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11681   ins_encode %{
11682     __ string_compare($str1$$Register, $str2$$Register,
11683                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11684                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11685   %}
11686   ins_pipe( pipe_slow );
11687 %}
11688 
11689 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11690                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11691 %{
11692   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11693   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11694   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11695 
11696   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11697   ins_encode %{
11698     __ string_compare($str1$$Register, $str2$$Register,
11699                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11700                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11701   %}
11702   ins_pipe( pipe_slow );
11703 %}
11704 
11705 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11706                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11707 %{
11708   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11709   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11710   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11711 
11712   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11713   ins_encode %{
11714     __ string_compare($str1$$Register, $str2$$Register,
11715                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11716                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11717   %}
11718   ins_pipe( pipe_slow );
11719 %}
11720 
11721 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11722                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11723 %{
11724   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11725   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11726   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11727 
11728   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11729   ins_encode %{
11730     __ string_compare($str1$$Register, $str2$$Register,
11731                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11732                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11733   %}
11734   ins_pipe( pipe_slow );
11735 %}
11736 
11737 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11738                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11739 %{
11740   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11741   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11742   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11743 
11744   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11745   ins_encode %{
11746     __ string_compare($str1$$Register, $str2$$Register,
11747                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11748                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11749   %}
11750   ins_pipe( pipe_slow );
11751 %}
11752 
11753 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11754                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11755 %{
11756   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11757   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11758   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11759 
11760   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11761   ins_encode %{
11762     __ string_compare($str2$$Register, $str1$$Register,
11763                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11764                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11765   %}
11766   ins_pipe( pipe_slow );
11767 %}
11768 
11769 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11770                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11771 %{
11772   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11773   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11774   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11775 
11776   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11777   ins_encode %{
11778     __ string_compare($str2$$Register, $str1$$Register,
11779                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11780                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11781   %}
11782   ins_pipe( pipe_slow );
11783 %}
11784 
11785 // fast search of substring with known size.
11786 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11787                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11788 %{
11789   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11790   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11791   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11792 
11793   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11794   ins_encode %{
11795     int icnt2 = (int)$int_cnt2$$constant;
11796     if (icnt2 >= 16) {
11797       // IndexOf for constant substrings with size >= 16 elements
11798       // which don't need to be loaded through stack.
11799       __ string_indexofC8($str1$$Register, $str2$$Register,
11800                           $cnt1$$Register, $cnt2$$Register,
11801                           icnt2, $result$$Register,
11802                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11803     } else {
11804       // Small strings are loaded through stack if they cross page boundary.
11805       __ string_indexof($str1$$Register, $str2$$Register,
11806                         $cnt1$$Register, $cnt2$$Register,
11807                         icnt2, $result$$Register,
11808                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11809     }
11810   %}
11811   ins_pipe( pipe_slow );
11812 %}
11813 
11814 // fast search of substring with known size.
11815 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11816                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11817 %{
11818   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11819   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11820   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11821 
11822   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11823   ins_encode %{
11824     int icnt2 = (int)$int_cnt2$$constant;
11825     if (icnt2 >= 8) {
11826       // IndexOf for constant substrings with size >= 8 elements
11827       // which don't need to be loaded through stack.
11828       __ string_indexofC8($str1$$Register, $str2$$Register,
11829                           $cnt1$$Register, $cnt2$$Register,
11830                           icnt2, $result$$Register,
11831                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11832     } else {
11833       // Small strings are loaded through stack if they cross page boundary.
11834       __ string_indexof($str1$$Register, $str2$$Register,
11835                         $cnt1$$Register, $cnt2$$Register,
11836                         icnt2, $result$$Register,
11837                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11838     }
11839   %}
11840   ins_pipe( pipe_slow );
11841 %}
11842 
11843 // fast search of substring with known size.
11844 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11845                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11846 %{
11847   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11848   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11849   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11850 
11851   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11852   ins_encode %{
11853     int icnt2 = (int)$int_cnt2$$constant;
11854     if (icnt2 >= 8) {
11855       // IndexOf for constant substrings with size >= 8 elements
11856       // which don't need to be loaded through stack.
11857       __ string_indexofC8($str1$$Register, $str2$$Register,
11858                           $cnt1$$Register, $cnt2$$Register,
11859                           icnt2, $result$$Register,
11860                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11861     } else {
11862       // Small strings are loaded through stack if they cross page boundary.
11863       __ string_indexof($str1$$Register, $str2$$Register,
11864                         $cnt1$$Register, $cnt2$$Register,
11865                         icnt2, $result$$Register,
11866                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11867     }
11868   %}
11869   ins_pipe( pipe_slow );
11870 %}
11871 
11872 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11873                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11874 %{
11875   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11876   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11877   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11878 
11879   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11880   ins_encode %{
11881     __ string_indexof($str1$$Register, $str2$$Register,
11882                       $cnt1$$Register, $cnt2$$Register,
11883                       (-1), $result$$Register,
11884                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11885   %}
11886   ins_pipe( pipe_slow );
11887 %}
11888 
11889 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11890                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11891 %{
11892   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11893   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11894   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11895 
11896   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11897   ins_encode %{
11898     __ string_indexof($str1$$Register, $str2$$Register,
11899                       $cnt1$$Register, $cnt2$$Register,
11900                       (-1), $result$$Register,
11901                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11902   %}
11903   ins_pipe( pipe_slow );
11904 %}
11905 
11906 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11907                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11908 %{
11909   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11910   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11911   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11912 
11913   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11914   ins_encode %{
11915     __ string_indexof($str1$$Register, $str2$$Register,
11916                       $cnt1$$Register, $cnt2$$Register,
11917                       (-1), $result$$Register,
11918                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11919   %}
11920   ins_pipe( pipe_slow );
11921 %}
11922 
11923 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11924                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11925 %{
11926   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11927   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11928   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11929   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11930   ins_encode %{
11931     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11932                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11933   %}
11934   ins_pipe( pipe_slow );
11935 %}
11936 
11937 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11938                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11939 %{
11940   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11941   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11942   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11943   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11944   ins_encode %{
11945     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11946                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11947   %}
11948   ins_pipe( pipe_slow );
11949 %}
11950 
11951 // fast string equals
11952 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11953                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11954 %{
11955   predicate(!VM_Version::supports_avx512vlbw());
11956   match(Set result (StrEquals (Binary str1 str2) cnt));
11957   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11958 
11959   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11960   ins_encode %{
11961     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11962                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11963                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11964   %}
11965   ins_pipe( pipe_slow );
11966 %}
11967 
11968 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11969                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
11970 %{
11971   predicate(VM_Version::supports_avx512vlbw());
11972   match(Set result (StrEquals (Binary str1 str2) cnt));
11973   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11974 
11975   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11976   ins_encode %{
11977     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11978                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11979                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11980   %}
11981   ins_pipe( pipe_slow );
11982 %}
11983 
11984 // fast array equals
11985 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11986                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11987 %{
11988   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11989   match(Set result (AryEq ary1 ary2));
11990   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11991 
11992   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11993   ins_encode %{
11994     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11995                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11996                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11997   %}
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12002                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12003 %{
12004   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12005   match(Set result (AryEq ary1 ary2));
12006   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12007 
12008   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12009   ins_encode %{
12010     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12011                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12012                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12013   %}
12014   ins_pipe( pipe_slow );
12015 %}
12016 
12017 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12018                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12019 %{
12020   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12021   match(Set result (AryEq ary1 ary2));
12022   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12023 
12024   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12025   ins_encode %{
12026     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12027                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12028                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12029   %}
12030   ins_pipe( pipe_slow );
12031 %}
12032 
12033 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12034                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12035 %{
12036   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12037   match(Set result (AryEq ary1 ary2));
12038   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12039 
12040   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12041   ins_encode %{
12042     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12043                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12044                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12045   %}
12046   ins_pipe( pipe_slow );
12047 %}
12048 
12049 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
12050                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
12051                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
12052                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
12053                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
12054 %{
12055   predicate(UseAVX >= 2);
12056   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
12057   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
12058          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
12059          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
12060          USE basic_type, KILL cr);
12061 
12062   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
12063   ins_encode %{
12064     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
12065                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12066                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
12067                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
12068                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
12069                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
12070                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
12071   %}
12072   ins_pipe( pipe_slow );
12073 %}
12074 
12075 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12076                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
12077 %{
12078   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12079   match(Set result (CountPositives ary1 len));
12080   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12081 
12082   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12083   ins_encode %{
12084     __ count_positives($ary1$$Register, $len$$Register,
12085                        $result$$Register, $tmp3$$Register,
12086                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12087   %}
12088   ins_pipe( pipe_slow );
12089 %}
12090 
12091 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12092                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
12093 %{
12094   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12095   match(Set result (CountPositives ary1 len));
12096   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12097 
12098   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12099   ins_encode %{
12100     __ count_positives($ary1$$Register, $len$$Register,
12101                        $result$$Register, $tmp3$$Register,
12102                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12103   %}
12104   ins_pipe( pipe_slow );
12105 %}
12106 
12107 // fast char[] to byte[] compression
12108 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12109                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12110   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12111   match(Set result (StrCompressedCopy src (Binary dst len)));
12112   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
12113          USE_KILL len, KILL tmp5, KILL cr);
12114 
12115   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12116   ins_encode %{
12117     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12118                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12119                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12120                            knoreg, knoreg);
12121   %}
12122   ins_pipe( pipe_slow );
12123 %}
12124 
12125 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12126                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12127   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12128   match(Set result (StrCompressedCopy src (Binary dst len)));
12129   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
12130          USE_KILL len, KILL tmp5, KILL cr);
12131 
12132   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12133   ins_encode %{
12134     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12135                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12136                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12137                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12138   %}
12139   ins_pipe( pipe_slow );
12140 %}
12141 // fast byte[] to char[] inflation
12142 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12143                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
12144   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12145   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12146   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12147 
12148   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12149   ins_encode %{
12150     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12151                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12152   %}
12153   ins_pipe( pipe_slow );
12154 %}
12155 
12156 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12157                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
12158   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12159   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12160   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12161 
12162   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12163   ins_encode %{
12164     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12165                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12166   %}
12167   ins_pipe( pipe_slow );
12168 %}
12169 
12170 // encode char[] to byte[] in ISO_8859_1
12171 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12172                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12173                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12174   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12175   match(Set result (EncodeISOArray src (Binary dst len)));
12176   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12177 
12178   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12179   ins_encode %{
12180     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12181                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12182                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12183   %}
12184   ins_pipe( pipe_slow );
12185 %}
12186 
12187 // encode char[] to byte[] in ASCII
12188 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12189                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12190                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12191   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12192   match(Set result (EncodeISOArray src (Binary dst len)));
12193   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12194 
12195   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12196   ins_encode %{
12197     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12198                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12199                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12200   %}
12201   ins_pipe( pipe_slow );
12202 %}
12203 
12204 //----------Overflow Math Instructions-----------------------------------------
12205 
12206 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12207 %{
12208   match(Set cr (OverflowAddI op1 op2));
12209   effect(DEF cr, USE_KILL op1, USE op2);
12210 
12211   format %{ "addl    $op1, $op2\t# overflow check int" %}
12212 
12213   ins_encode %{
12214     __ addl($op1$$Register, $op2$$Register);
12215   %}
12216   ins_pipe(ialu_reg_reg);
12217 %}
12218 
12219 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
12220 %{
12221   match(Set cr (OverflowAddI op1 op2));
12222   effect(DEF cr, USE_KILL op1, USE op2);
12223 
12224   format %{ "addl    $op1, $op2\t# overflow check int" %}
12225 
12226   ins_encode %{
12227     __ addl($op1$$Register, $op2$$constant);
12228   %}
12229   ins_pipe(ialu_reg_reg);
12230 %}
12231 
12232 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12233 %{
12234   match(Set cr (OverflowAddL op1 op2));
12235   effect(DEF cr, USE_KILL op1, USE op2);
12236 
12237   format %{ "addq    $op1, $op2\t# overflow check long" %}
12238   ins_encode %{
12239     __ addq($op1$$Register, $op2$$Register);
12240   %}
12241   ins_pipe(ialu_reg_reg);
12242 %}
12243 
12244 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
12245 %{
12246   match(Set cr (OverflowAddL op1 op2));
12247   effect(DEF cr, USE_KILL op1, USE op2);
12248 
12249   format %{ "addq    $op1, $op2\t# overflow check long" %}
12250   ins_encode %{
12251     __ addq($op1$$Register, $op2$$constant);
12252   %}
12253   ins_pipe(ialu_reg_reg);
12254 %}
12255 
12256 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12257 %{
12258   match(Set cr (OverflowSubI op1 op2));
12259 
12260   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12261   ins_encode %{
12262     __ cmpl($op1$$Register, $op2$$Register);
12263   %}
12264   ins_pipe(ialu_reg_reg);
12265 %}
12266 
12267 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12268 %{
12269   match(Set cr (OverflowSubI op1 op2));
12270 
12271   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12272   ins_encode %{
12273     __ cmpl($op1$$Register, $op2$$constant);
12274   %}
12275   ins_pipe(ialu_reg_reg);
12276 %}
12277 
12278 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12279 %{
12280   match(Set cr (OverflowSubL op1 op2));
12281 
12282   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12283   ins_encode %{
12284     __ cmpq($op1$$Register, $op2$$Register);
12285   %}
12286   ins_pipe(ialu_reg_reg);
12287 %}
12288 
12289 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12290 %{
12291   match(Set cr (OverflowSubL op1 op2));
12292 
12293   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12294   ins_encode %{
12295     __ cmpq($op1$$Register, $op2$$constant);
12296   %}
12297   ins_pipe(ialu_reg_reg);
12298 %}
12299 
12300 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
12301 %{
12302   match(Set cr (OverflowSubI zero op2));
12303   effect(DEF cr, USE_KILL op2);
12304 
12305   format %{ "negl    $op2\t# overflow check int" %}
12306   ins_encode %{
12307     __ negl($op2$$Register);
12308   %}
12309   ins_pipe(ialu_reg_reg);
12310 %}
12311 
12312 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
12313 %{
12314   match(Set cr (OverflowSubL zero op2));
12315   effect(DEF cr, USE_KILL op2);
12316 
12317   format %{ "negq    $op2\t# overflow check long" %}
12318   ins_encode %{
12319     __ negq($op2$$Register);
12320   %}
12321   ins_pipe(ialu_reg_reg);
12322 %}
12323 
12324 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12325 %{
12326   match(Set cr (OverflowMulI op1 op2));
12327   effect(DEF cr, USE_KILL op1, USE op2);
12328 
12329   format %{ "imull    $op1, $op2\t# overflow check int" %}
12330   ins_encode %{
12331     __ imull($op1$$Register, $op2$$Register);
12332   %}
12333   ins_pipe(ialu_reg_reg_alu0);
12334 %}
12335 
12336 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
12337 %{
12338   match(Set cr (OverflowMulI op1 op2));
12339   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12340 
12341   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
12342   ins_encode %{
12343     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
12344   %}
12345   ins_pipe(ialu_reg_reg_alu0);
12346 %}
12347 
12348 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12349 %{
12350   match(Set cr (OverflowMulL op1 op2));
12351   effect(DEF cr, USE_KILL op1, USE op2);
12352 
12353   format %{ "imulq    $op1, $op2\t# overflow check long" %}
12354   ins_encode %{
12355     __ imulq($op1$$Register, $op2$$Register);
12356   %}
12357   ins_pipe(ialu_reg_reg_alu0);
12358 %}
12359 
12360 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
12361 %{
12362   match(Set cr (OverflowMulL op1 op2));
12363   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12364 
12365   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12366   ins_encode %{
12367     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12368   %}
12369   ins_pipe(ialu_reg_reg_alu0);
12370 %}
12371 
12372 
12373 //----------Control Flow Instructions------------------------------------------
12374 // Signed compare Instructions
12375 
12376 // XXX more variants!!
12377 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12378 %{
12379   match(Set cr (CmpI op1 op2));
12380   effect(DEF cr, USE op1, USE op2);
12381 
12382   format %{ "cmpl    $op1, $op2" %}
12383   ins_encode %{
12384     __ cmpl($op1$$Register, $op2$$Register);
12385   %}
12386   ins_pipe(ialu_cr_reg_reg);
12387 %}
12388 
12389 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12390 %{
12391   match(Set cr (CmpI op1 op2));
12392 
12393   format %{ "cmpl    $op1, $op2" %}
12394   ins_encode %{
12395     __ cmpl($op1$$Register, $op2$$constant);
12396   %}
12397   ins_pipe(ialu_cr_reg_imm);
12398 %}
12399 
12400 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12401 %{
12402   match(Set cr (CmpI op1 (LoadI op2)));
12403 
12404   ins_cost(500); // XXX
12405   format %{ "cmpl    $op1, $op2" %}
12406   ins_encode %{
12407     __ cmpl($op1$$Register, $op2$$Address);
12408   %}
12409   ins_pipe(ialu_cr_reg_mem);
12410 %}
12411 
12412 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
12413 %{
12414   match(Set cr (CmpI src zero));
12415 
12416   format %{ "testl   $src, $src" %}
12417   ins_encode %{
12418     __ testl($src$$Register, $src$$Register);
12419   %}
12420   ins_pipe(ialu_cr_reg_imm);
12421 %}
12422 
12423 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
12424 %{
12425   match(Set cr (CmpI (AndI src con) zero));
12426 
12427   format %{ "testl   $src, $con" %}
12428   ins_encode %{
12429     __ testl($src$$Register, $con$$constant);
12430   %}
12431   ins_pipe(ialu_cr_reg_imm);
12432 %}
12433 
12434 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
12435 %{
12436   match(Set cr (CmpI (AndI src1 src2) zero));
12437 
12438   format %{ "testl   $src1, $src2" %}
12439   ins_encode %{
12440     __ testl($src1$$Register, $src2$$Register);
12441   %}
12442   ins_pipe(ialu_cr_reg_imm);
12443 %}
12444 
12445 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
12446 %{
12447   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12448 
12449   format %{ "testl   $src, $mem" %}
12450   ins_encode %{
12451     __ testl($src$$Register, $mem$$Address);
12452   %}
12453   ins_pipe(ialu_cr_reg_mem);
12454 %}
12455 
12456 // Unsigned compare Instructions; really, same as signed except they
12457 // produce an rFlagsRegU instead of rFlagsReg.
12458 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12459 %{
12460   match(Set cr (CmpU op1 op2));
12461 
12462   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12463   ins_encode %{
12464     __ cmpl($op1$$Register, $op2$$Register);
12465   %}
12466   ins_pipe(ialu_cr_reg_reg);
12467 %}
12468 
12469 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12470 %{
12471   match(Set cr (CmpU op1 op2));
12472 
12473   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12474   ins_encode %{
12475     __ cmpl($op1$$Register, $op2$$constant);
12476   %}
12477   ins_pipe(ialu_cr_reg_imm);
12478 %}
12479 
12480 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12481 %{
12482   match(Set cr (CmpU op1 (LoadI op2)));
12483 
12484   ins_cost(500); // XXX
12485   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12486   ins_encode %{
12487     __ cmpl($op1$$Register, $op2$$Address);
12488   %}
12489   ins_pipe(ialu_cr_reg_mem);
12490 %}
12491 
12492 // // // Cisc-spilled version of cmpU_rReg
12493 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12494 // //%{
12495 // //  match(Set cr (CmpU (LoadI op1) op2));
12496 // //
12497 // //  format %{ "CMPu   $op1,$op2" %}
12498 // //  ins_cost(500);
12499 // //  opcode(0x39);  /* Opcode 39 /r */
12500 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12501 // //%}
12502 
12503 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
12504 %{
12505   match(Set cr (CmpU src zero));
12506 
12507   format %{ "testl   $src, $src\t# unsigned" %}
12508   ins_encode %{
12509     __ testl($src$$Register, $src$$Register);
12510   %}
12511   ins_pipe(ialu_cr_reg_imm);
12512 %}
12513 
12514 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12515 %{
12516   match(Set cr (CmpP op1 op2));
12517 
12518   format %{ "cmpq    $op1, $op2\t# ptr" %}
12519   ins_encode %{
12520     __ cmpq($op1$$Register, $op2$$Register);
12521   %}
12522   ins_pipe(ialu_cr_reg_reg);
12523 %}
12524 
12525 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12526 %{
12527   match(Set cr (CmpP op1 (LoadP op2)));
12528   predicate(n->in(2)->as_Load()->barrier_data() == 0);
12529 
12530   ins_cost(500); // XXX
12531   format %{ "cmpq    $op1, $op2\t# ptr" %}
12532   ins_encode %{
12533     __ cmpq($op1$$Register, $op2$$Address);
12534   %}
12535   ins_pipe(ialu_cr_reg_mem);
12536 %}
12537 
12538 // // // Cisc-spilled version of cmpP_rReg
12539 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12540 // //%{
12541 // //  match(Set cr (CmpP (LoadP op1) op2));
12542 // //
12543 // //  format %{ "CMPu   $op1,$op2" %}
12544 // //  ins_cost(500);
12545 // //  opcode(0x39);  /* Opcode 39 /r */
12546 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12547 // //%}
12548 
12549 // XXX this is generalized by compP_rReg_mem???
12550 // Compare raw pointer (used in out-of-heap check).
12551 // Only works because non-oop pointers must be raw pointers
12552 // and raw pointers have no anti-dependencies.
12553 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12554 %{
12555   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
12556             n->in(2)->as_Load()->barrier_data() == 0);
12557   match(Set cr (CmpP op1 (LoadP op2)));
12558 
12559   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12560   ins_encode %{
12561     __ cmpq($op1$$Register, $op2$$Address);
12562   %}
12563   ins_pipe(ialu_cr_reg_mem);
12564 %}
12565 
12566 // This will generate a signed flags result. This should be OK since
12567 // any compare to a zero should be eq/neq.
12568 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12569 %{
12570   match(Set cr (CmpP src zero));
12571 
12572   format %{ "testq   $src, $src\t# ptr" %}
12573   ins_encode %{
12574     __ testq($src$$Register, $src$$Register);
12575   %}
12576   ins_pipe(ialu_cr_reg_imm);
12577 %}
12578 
12579 // This will generate a signed flags result. This should be OK since
12580 // any compare to a zero should be eq/neq.
12581 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12582 %{
12583   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12584             n->in(1)->as_Load()->barrier_data() == 0);
12585   match(Set cr (CmpP (LoadP op) zero));
12586 
12587   ins_cost(500); // XXX
12588   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12589   ins_encode %{
12590     __ testq($op$$Address, 0xFFFFFFFF);
12591   %}
12592   ins_pipe(ialu_cr_reg_imm);
12593 %}
12594 
12595 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12596 %{
12597   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12598             n->in(1)->as_Load()->barrier_data() == 0);
12599   match(Set cr (CmpP (LoadP mem) zero));
12600 
12601   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12602   ins_encode %{
12603     __ cmpq(r12, $mem$$Address);
12604   %}
12605   ins_pipe(ialu_cr_reg_mem);
12606 %}
12607 
12608 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12609 %{
12610   match(Set cr (CmpN op1 op2));
12611 
12612   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12613   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12614   ins_pipe(ialu_cr_reg_reg);
12615 %}
12616 
12617 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12618 %{
12619   match(Set cr (CmpN src (LoadN mem)));
12620 
12621   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12622   ins_encode %{
12623     __ cmpl($src$$Register, $mem$$Address);
12624   %}
12625   ins_pipe(ialu_cr_reg_mem);
12626 %}
12627 
12628 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12629   match(Set cr (CmpN op1 op2));
12630 
12631   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12632   ins_encode %{
12633     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12634   %}
12635   ins_pipe(ialu_cr_reg_imm);
12636 %}
12637 
12638 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12639 %{
12640   match(Set cr (CmpN src (LoadN mem)));
12641 
12642   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12643   ins_encode %{
12644     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12645   %}
12646   ins_pipe(ialu_cr_reg_mem);
12647 %}
12648 
12649 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12650   match(Set cr (CmpN op1 op2));
12651 
12652   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12653   ins_encode %{
12654     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12655   %}
12656   ins_pipe(ialu_cr_reg_imm);
12657 %}
12658 
12659 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12660 %{
12661   predicate(!UseCompactObjectHeaders);
12662   match(Set cr (CmpN src (LoadNKlass mem)));
12663 
12664   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12665   ins_encode %{
12666     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12667   %}
12668   ins_pipe(ialu_cr_reg_mem);
12669 %}
12670 
12671 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12672   match(Set cr (CmpN src zero));
12673 
12674   format %{ "testl   $src, $src\t# compressed ptr" %}
12675   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12676   ins_pipe(ialu_cr_reg_imm);
12677 %}
12678 
12679 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12680 %{
12681   predicate(CompressedOops::base() != NULL);
12682   match(Set cr (CmpN (LoadN mem) zero));
12683 
12684   ins_cost(500); // XXX
12685   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12686   ins_encode %{
12687     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12688   %}
12689   ins_pipe(ialu_cr_reg_mem);
12690 %}
12691 
12692 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12693 %{
12694   predicate(CompressedOops::base() == NULL);
12695   match(Set cr (CmpN (LoadN mem) zero));
12696 
12697   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12698   ins_encode %{
12699     __ cmpl(r12, $mem$$Address);
12700   %}
12701   ins_pipe(ialu_cr_reg_mem);
12702 %}
12703 
12704 // Yanked all unsigned pointer compare operations.
12705 // Pointer compares are done with CmpP which is already unsigned.
12706 
12707 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12708 %{
12709   match(Set cr (CmpL op1 op2));
12710 
12711   format %{ "cmpq    $op1, $op2" %}
12712   ins_encode %{
12713     __ cmpq($op1$$Register, $op2$$Register);
12714   %}
12715   ins_pipe(ialu_cr_reg_reg);
12716 %}
12717 
12718 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12719 %{
12720   match(Set cr (CmpL op1 op2));
12721 
12722   format %{ "cmpq    $op1, $op2" %}
12723   ins_encode %{
12724     __ cmpq($op1$$Register, $op2$$constant);
12725   %}
12726   ins_pipe(ialu_cr_reg_imm);
12727 %}
12728 
12729 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12730 %{
12731   match(Set cr (CmpL op1 (LoadL op2)));
12732 
12733   format %{ "cmpq    $op1, $op2" %}
12734   ins_encode %{
12735     __ cmpq($op1$$Register, $op2$$Address);
12736   %}
12737   ins_pipe(ialu_cr_reg_mem);
12738 %}
12739 
12740 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12741 %{
12742   match(Set cr (CmpL src zero));
12743 
12744   format %{ "testq   $src, $src" %}
12745   ins_encode %{
12746     __ testq($src$$Register, $src$$Register);
12747   %}
12748   ins_pipe(ialu_cr_reg_imm);
12749 %}
12750 
12751 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12752 %{
12753   match(Set cr (CmpL (AndL src con) zero));
12754 
12755   format %{ "testq   $src, $con\t# long" %}
12756   ins_encode %{
12757     __ testq($src$$Register, $con$$constant);
12758   %}
12759   ins_pipe(ialu_cr_reg_imm);
12760 %}
12761 
12762 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
12763 %{
12764   match(Set cr (CmpL (AndL src1 src2) zero));
12765 
12766   format %{ "testq   $src1, $src2\t# long" %}
12767   ins_encode %{
12768     __ testq($src1$$Register, $src2$$Register);
12769   %}
12770   ins_pipe(ialu_cr_reg_imm);
12771 %}
12772 
12773 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12774 %{
12775   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12776 
12777   format %{ "testq   $src, $mem" %}
12778   ins_encode %{
12779     __ testq($src$$Register, $mem$$Address);
12780   %}
12781   ins_pipe(ialu_cr_reg_mem);
12782 %}
12783 
12784 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12785 %{
12786   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12787 
12788   format %{ "testq   $src, $mem" %}
12789   ins_encode %{
12790     __ testq($src$$Register, $mem$$Address);
12791   %}
12792   ins_pipe(ialu_cr_reg_mem);
12793 %}
12794 
12795 // Manifest a CmpU result in an integer register.  Very painful.
12796 // This is the test to avoid.
12797 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
12798 %{
12799   match(Set dst (CmpU3 src1 src2));
12800   effect(KILL flags);
12801 
12802   ins_cost(275); // XXX
12803   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
12804             "movl    $dst, -1\n\t"
12805             "jb,u    done\n\t"
12806             "setne   $dst\n\t"
12807             "movzbl  $dst, $dst\n\t"
12808     "done:" %}
12809   ins_encode %{
12810     Label done;
12811     __ cmpl($src1$$Register, $src2$$Register);
12812     __ movl($dst$$Register, -1);
12813     __ jccb(Assembler::below, done);
12814     __ setb(Assembler::notZero, $dst$$Register);
12815     __ movzbl($dst$$Register, $dst$$Register);
12816     __ bind(done);
12817   %}
12818   ins_pipe(pipe_slow);
12819 %}
12820 
12821 // Manifest a CmpL result in an integer register.  Very painful.
12822 // This is the test to avoid.
12823 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12824 %{
12825   match(Set dst (CmpL3 src1 src2));
12826   effect(KILL flags);
12827 
12828   ins_cost(275); // XXX
12829   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12830             "movl    $dst, -1\n\t"
12831             "jl,s    done\n\t"
12832             "setne   $dst\n\t"
12833             "movzbl  $dst, $dst\n\t"
12834     "done:" %}
12835   ins_encode %{
12836     Label done;
12837     __ cmpq($src1$$Register, $src2$$Register);
12838     __ movl($dst$$Register, -1);
12839     __ jccb(Assembler::less, done);
12840     __ setb(Assembler::notZero, $dst$$Register);
12841     __ movzbl($dst$$Register, $dst$$Register);
12842     __ bind(done);
12843   %}
12844   ins_pipe(pipe_slow);
12845 %}
12846 
12847 // Manifest a CmpUL result in an integer register.  Very painful.
12848 // This is the test to avoid.
12849 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12850 %{
12851   match(Set dst (CmpUL3 src1 src2));
12852   effect(KILL flags);
12853 
12854   ins_cost(275); // XXX
12855   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12856             "movl    $dst, -1\n\t"
12857             "jb,u    done\n\t"
12858             "setne   $dst\n\t"
12859             "movzbl  $dst, $dst\n\t"
12860     "done:" %}
12861   ins_encode %{
12862     Label done;
12863     __ cmpq($src1$$Register, $src2$$Register);
12864     __ movl($dst$$Register, -1);
12865     __ jccb(Assembler::below, done);
12866     __ setb(Assembler::notZero, $dst$$Register);
12867     __ movzbl($dst$$Register, $dst$$Register);
12868     __ bind(done);
12869   %}
12870   ins_pipe(pipe_slow);
12871 %}
12872 
12873 // Unsigned long compare Instructions; really, same as signed long except they
12874 // produce an rFlagsRegU instead of rFlagsReg.
12875 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
12876 %{
12877   match(Set cr (CmpUL op1 op2));
12878 
12879   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12880   ins_encode %{
12881     __ cmpq($op1$$Register, $op2$$Register);
12882   %}
12883   ins_pipe(ialu_cr_reg_reg);
12884 %}
12885 
12886 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
12887 %{
12888   match(Set cr (CmpUL op1 op2));
12889 
12890   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12891   ins_encode %{
12892     __ cmpq($op1$$Register, $op2$$constant);
12893   %}
12894   ins_pipe(ialu_cr_reg_imm);
12895 %}
12896 
12897 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
12898 %{
12899   match(Set cr (CmpUL op1 (LoadL op2)));
12900 
12901   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12902   ins_encode %{
12903     __ cmpq($op1$$Register, $op2$$Address);
12904   %}
12905   ins_pipe(ialu_cr_reg_mem);
12906 %}
12907 
12908 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
12909 %{
12910   match(Set cr (CmpUL src zero));
12911 
12912   format %{ "testq   $src, $src\t# unsigned" %}
12913   ins_encode %{
12914     __ testq($src$$Register, $src$$Register);
12915   %}
12916   ins_pipe(ialu_cr_reg_imm);
12917 %}
12918 
12919 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
12920 %{
12921   match(Set cr (CmpI (LoadB mem) imm));
12922 
12923   ins_cost(125);
12924   format %{ "cmpb    $mem, $imm" %}
12925   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
12926   ins_pipe(ialu_cr_reg_mem);
12927 %}
12928 
12929 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
12930 %{
12931   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
12932 
12933   ins_cost(125);
12934   format %{ "testb   $mem, $imm\t# ubyte" %}
12935   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12936   ins_pipe(ialu_cr_reg_mem);
12937 %}
12938 
12939 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
12940 %{
12941   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
12942 
12943   ins_cost(125);
12944   format %{ "testb   $mem, $imm\t# byte" %}
12945   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12946   ins_pipe(ialu_cr_reg_mem);
12947 %}
12948 
12949 //----------Max and Min--------------------------------------------------------
12950 // Min Instructions
12951 
12952 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12953 %{
12954   effect(USE_DEF dst, USE src, USE cr);
12955 
12956   format %{ "cmovlgt $dst, $src\t# min" %}
12957   ins_encode %{
12958     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
12959   %}
12960   ins_pipe(pipe_cmov_reg);
12961 %}
12962 
12963 
12964 instruct minI_rReg(rRegI dst, rRegI src)
12965 %{
12966   match(Set dst (MinI dst src));
12967 
12968   ins_cost(200);
12969   expand %{
12970     rFlagsReg cr;
12971     compI_rReg(cr, dst, src);
12972     cmovI_reg_g(dst, src, cr);
12973   %}
12974 %}
12975 
12976 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12977 %{
12978   effect(USE_DEF dst, USE src, USE cr);
12979 
12980   format %{ "cmovllt $dst, $src\t# max" %}
12981   ins_encode %{
12982     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
12983   %}
12984   ins_pipe(pipe_cmov_reg);
12985 %}
12986 
12987 
12988 instruct maxI_rReg(rRegI dst, rRegI src)
12989 %{
12990   match(Set dst (MaxI dst src));
12991 
12992   ins_cost(200);
12993   expand %{
12994     rFlagsReg cr;
12995     compI_rReg(cr, dst, src);
12996     cmovI_reg_l(dst, src, cr);
12997   %}
12998 %}
12999 
13000 // ============================================================================
13001 // Branch Instructions
13002 
13003 // Jump Direct - Label defines a relative address from JMP+1
13004 instruct jmpDir(label labl)
13005 %{
13006   match(Goto);
13007   effect(USE labl);
13008 
13009   ins_cost(300);
13010   format %{ "jmp     $labl" %}
13011   size(5);
13012   ins_encode %{
13013     Label* L = $labl$$label;
13014     __ jmp(*L, false); // Always long jump
13015   %}
13016   ins_pipe(pipe_jmp);
13017 %}
13018 
13019 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13020 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
13021 %{
13022   match(If cop cr);
13023   effect(USE labl);
13024 
13025   ins_cost(300);
13026   format %{ "j$cop     $labl" %}
13027   size(6);
13028   ins_encode %{
13029     Label* L = $labl$$label;
13030     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13031   %}
13032   ins_pipe(pipe_jcc);
13033 %}
13034 
13035 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13036 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
13037 %{
13038   match(CountedLoopEnd cop cr);
13039   effect(USE labl);
13040 
13041   ins_cost(300);
13042   format %{ "j$cop     $labl\t# loop end" %}
13043   size(6);
13044   ins_encode %{
13045     Label* L = $labl$$label;
13046     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13047   %}
13048   ins_pipe(pipe_jcc);
13049 %}
13050 
13051 // Jump Direct Conditional - using unsigned comparison
13052 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13053   match(If cop cmp);
13054   effect(USE labl);
13055 
13056   ins_cost(300);
13057   format %{ "j$cop,u   $labl" %}
13058   size(6);
13059   ins_encode %{
13060     Label* L = $labl$$label;
13061     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13062   %}
13063   ins_pipe(pipe_jcc);
13064 %}
13065 
13066 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13067   match(If cop cmp);
13068   effect(USE labl);
13069 
13070   ins_cost(200);
13071   format %{ "j$cop,u   $labl" %}
13072   size(6);
13073   ins_encode %{
13074     Label* L = $labl$$label;
13075     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13076   %}
13077   ins_pipe(pipe_jcc);
13078 %}
13079 
13080 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13081   match(If cop cmp);
13082   effect(USE labl);
13083 
13084   ins_cost(200);
13085   format %{ $$template
13086     if ($cop$$cmpcode == Assembler::notEqual) {
13087       $$emit$$"jp,u    $labl\n\t"
13088       $$emit$$"j$cop,u   $labl"
13089     } else {
13090       $$emit$$"jp,u    done\n\t"
13091       $$emit$$"j$cop,u   $labl\n\t"
13092       $$emit$$"done:"
13093     }
13094   %}
13095   ins_encode %{
13096     Label* l = $labl$$label;
13097     if ($cop$$cmpcode == Assembler::notEqual) {
13098       __ jcc(Assembler::parity, *l, false);
13099       __ jcc(Assembler::notEqual, *l, false);
13100     } else if ($cop$$cmpcode == Assembler::equal) {
13101       Label done;
13102       __ jccb(Assembler::parity, done);
13103       __ jcc(Assembler::equal, *l, false);
13104       __ bind(done);
13105     } else {
13106        ShouldNotReachHere();
13107     }
13108   %}
13109   ins_pipe(pipe_jcc);
13110 %}
13111 
13112 // ============================================================================
13113 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
13114 // superklass array for an instance of the superklass.  Set a hidden
13115 // internal cache on a hit (cache is checked with exposed code in
13116 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13117 // encoding ALSO sets flags.
13118 
13119 instruct partialSubtypeCheck(rdi_RegP result,
13120                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13121                              rFlagsReg cr)
13122 %{
13123   match(Set result (PartialSubtypeCheck sub super));
13124   effect(KILL rcx, KILL cr);
13125 
13126   ins_cost(1100);  // slightly larger than the next version
13127   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13128             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13129             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13130             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
13131             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
13132             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13133             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
13134     "miss:\t" %}
13135 
13136   opcode(0x1); // Force a XOR of RDI
13137   ins_encode(enc_PartialSubtypeCheck());
13138   ins_pipe(pipe_slow);
13139 %}
13140 
13141 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
13142                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13143                                      immP0 zero,
13144                                      rdi_RegP result)
13145 %{
13146   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13147   effect(KILL rcx, KILL result);
13148 
13149   ins_cost(1000);
13150   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13151             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13152             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13153             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
13154             "jne,s   miss\t\t# Missed: flags nz\n\t"
13155             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13156     "miss:\t" %}
13157 
13158   opcode(0x0); // No need to XOR RDI
13159   ins_encode(enc_PartialSubtypeCheck());
13160   ins_pipe(pipe_slow);
13161 %}
13162 
13163 // ============================================================================
13164 // Branch Instructions -- short offset versions
13165 //
13166 // These instructions are used to replace jumps of a long offset (the default
13167 // match) with jumps of a shorter offset.  These instructions are all tagged
13168 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13169 // match rules in general matching.  Instead, the ADLC generates a conversion
13170 // method in the MachNode which can be used to do in-place replacement of the
13171 // long variant with the shorter variant.  The compiler will determine if a
13172 // branch can be taken by the is_short_branch_offset() predicate in the machine
13173 // specific code section of the file.
13174 
13175 // Jump Direct - Label defines a relative address from JMP+1
13176 instruct jmpDir_short(label labl) %{
13177   match(Goto);
13178   effect(USE labl);
13179 
13180   ins_cost(300);
13181   format %{ "jmp,s   $labl" %}
13182   size(2);
13183   ins_encode %{
13184     Label* L = $labl$$label;
13185     __ jmpb(*L);
13186   %}
13187   ins_pipe(pipe_jmp);
13188   ins_short_branch(1);
13189 %}
13190 
13191 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13192 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
13193   match(If cop cr);
13194   effect(USE labl);
13195 
13196   ins_cost(300);
13197   format %{ "j$cop,s   $labl" %}
13198   size(2);
13199   ins_encode %{
13200     Label* L = $labl$$label;
13201     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13202   %}
13203   ins_pipe(pipe_jcc);
13204   ins_short_branch(1);
13205 %}
13206 
13207 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13208 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
13209   match(CountedLoopEnd cop cr);
13210   effect(USE labl);
13211 
13212   ins_cost(300);
13213   format %{ "j$cop,s   $labl\t# loop end" %}
13214   size(2);
13215   ins_encode %{
13216     Label* L = $labl$$label;
13217     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13218   %}
13219   ins_pipe(pipe_jcc);
13220   ins_short_branch(1);
13221 %}
13222 
13223 // Jump Direct Conditional - using unsigned comparison
13224 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13225   match(If cop cmp);
13226   effect(USE labl);
13227 
13228   ins_cost(300);
13229   format %{ "j$cop,us  $labl" %}
13230   size(2);
13231   ins_encode %{
13232     Label* L = $labl$$label;
13233     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13234   %}
13235   ins_pipe(pipe_jcc);
13236   ins_short_branch(1);
13237 %}
13238 
13239 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13240   match(If cop cmp);
13241   effect(USE labl);
13242 
13243   ins_cost(300);
13244   format %{ "j$cop,us  $labl" %}
13245   size(2);
13246   ins_encode %{
13247     Label* L = $labl$$label;
13248     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13249   %}
13250   ins_pipe(pipe_jcc);
13251   ins_short_branch(1);
13252 %}
13253 
13254 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13255   match(If cop cmp);
13256   effect(USE labl);
13257 
13258   ins_cost(300);
13259   format %{ $$template
13260     if ($cop$$cmpcode == Assembler::notEqual) {
13261       $$emit$$"jp,u,s  $labl\n\t"
13262       $$emit$$"j$cop,u,s  $labl"
13263     } else {
13264       $$emit$$"jp,u,s  done\n\t"
13265       $$emit$$"j$cop,u,s  $labl\n\t"
13266       $$emit$$"done:"
13267     }
13268   %}
13269   size(4);
13270   ins_encode %{
13271     Label* l = $labl$$label;
13272     if ($cop$$cmpcode == Assembler::notEqual) {
13273       __ jccb(Assembler::parity, *l);
13274       __ jccb(Assembler::notEqual, *l);
13275     } else if ($cop$$cmpcode == Assembler::equal) {
13276       Label done;
13277       __ jccb(Assembler::parity, done);
13278       __ jccb(Assembler::equal, *l);
13279       __ bind(done);
13280     } else {
13281        ShouldNotReachHere();
13282     }
13283   %}
13284   ins_pipe(pipe_jcc);
13285   ins_short_branch(1);
13286 %}
13287 
13288 // ============================================================================
13289 // inlined locking and unlocking
13290 
13291 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
13292   predicate(Compile::current()->use_rtm());
13293   match(Set cr (FastLock object box));
13294   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13295   ins_cost(300);
13296   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13297   ins_encode %{
13298     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13299                  $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread,
13300                  _rtm_counters, _stack_rtm_counters,
13301                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13302                  true, ra_->C->profile_rtm());
13303   %}
13304   ins_pipe(pipe_slow);
13305 %}
13306 
13307 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
13308   predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13309   match(Set cr (FastLock object box));
13310   effect(TEMP tmp, TEMP scr, USE_KILL box);
13311   ins_cost(300);
13312   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
13313   ins_encode %{
13314     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13315                  $scr$$Register, noreg, noreg, r15_thread, nullptr, nullptr, nullptr, false, false);
13316   %}
13317   ins_pipe(pipe_slow);
13318 %}
13319 
13320 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13321   predicate(LockingMode != LM_LIGHTWEIGHT);
13322   match(Set cr (FastUnlock object box));
13323   effect(TEMP tmp, USE_KILL box);
13324   ins_cost(300);
13325   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13326   ins_encode %{
13327     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13328   %}
13329   ins_pipe(pipe_slow);
13330 %}
13331 
13332 instruct cmpFastLockLightweight(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
13333   predicate(LockingMode == LM_LIGHTWEIGHT);
13334   match(Set cr (FastLock object box));
13335   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
13336   ins_cost(300);
13337   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
13338   ins_encode %{
13339     __ fast_lock_lightweight($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
13340   %}
13341   ins_pipe(pipe_slow);
13342 %}
13343 
13344 instruct cmpFastUnlockLightweight(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
13345   predicate(LockingMode == LM_LIGHTWEIGHT);
13346   match(Set cr (FastUnlock object rax_reg));
13347   effect(TEMP tmp, USE_KILL rax_reg);
13348   ins_cost(300);
13349   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
13350   ins_encode %{
13351     __ fast_unlock_lightweight($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
13352   %}
13353   ins_pipe(pipe_slow);
13354 %}
13355 
13356 
13357 // ============================================================================
13358 // Safepoint Instructions
13359 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13360 %{
13361   match(SafePoint poll);
13362   effect(KILL cr, USE poll);
13363 
13364   format %{ "testl   rax, [$poll]\t"
13365             "# Safepoint: poll for GC" %}
13366   ins_cost(125);
13367   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13368   ins_encode %{
13369     __ relocate(relocInfo::poll_type);
13370     address pre_pc = __ pc();
13371     __ testl(rax, Address($poll$$Register, 0));
13372     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13373   %}
13374   ins_pipe(ialu_reg_mem);
13375 %}
13376 
13377 instruct mask_all_evexL(kReg dst, rRegL src) %{
13378   match(Set dst (MaskAll src));
13379   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
13380   ins_encode %{
13381     int mask_len = Matcher::vector_length(this);
13382     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13383   %}
13384   ins_pipe( pipe_slow );
13385 %}
13386 
13387 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
13388   predicate(Matcher::vector_length(n) > 32);
13389   match(Set dst (MaskAll src));
13390   effect(TEMP tmp);
13391   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
13392   ins_encode %{
13393     int mask_len = Matcher::vector_length(this);
13394     __ movslq($tmp$$Register, $src$$Register);
13395     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
13396   %}
13397   ins_pipe( pipe_slow );
13398 %}
13399 
13400 // ============================================================================
13401 // Procedure Call/Return Instructions
13402 // Call Java Static Instruction
13403 // Note: If this code changes, the corresponding ret_addr_offset() and
13404 //       compute_padding() functions will have to be adjusted.
13405 instruct CallStaticJavaDirect(method meth) %{
13406   match(CallStaticJava);
13407   effect(USE meth);
13408 
13409   ins_cost(300);
13410   format %{ "call,static " %}
13411   opcode(0xE8); /* E8 cd */
13412   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13413   ins_pipe(pipe_slow);
13414   ins_alignment(4);
13415 %}
13416 
13417 // Call Java Dynamic Instruction
13418 // Note: If this code changes, the corresponding ret_addr_offset() and
13419 //       compute_padding() functions will have to be adjusted.
13420 instruct CallDynamicJavaDirect(method meth)
13421 %{
13422   match(CallDynamicJava);
13423   effect(USE meth);
13424 
13425   ins_cost(300);
13426   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13427             "call,dynamic " %}
13428   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13429   ins_pipe(pipe_slow);
13430   ins_alignment(4);
13431 %}
13432 
13433 // Call Runtime Instruction
13434 instruct CallRuntimeDirect(method meth)
13435 %{
13436   match(CallRuntime);
13437   effect(USE meth);
13438 
13439   ins_cost(300);
13440   format %{ "call,runtime " %}
13441   ins_encode(clear_avx, Java_To_Runtime(meth));
13442   ins_pipe(pipe_slow);
13443 %}
13444 
13445 // Call runtime without safepoint
13446 instruct CallLeafDirect(method meth)
13447 %{
13448   match(CallLeaf);
13449   effect(USE meth);
13450 
13451   ins_cost(300);
13452   format %{ "call_leaf,runtime " %}
13453   ins_encode(clear_avx, Java_To_Runtime(meth));
13454   ins_pipe(pipe_slow);
13455 %}
13456 
13457 // Call runtime without safepoint and with vector arguments
13458 instruct CallLeafDirectVector(method meth)
13459 %{
13460   match(CallLeafVector);
13461   effect(USE meth);
13462 
13463   ins_cost(300);
13464   format %{ "call_leaf,vector " %}
13465   ins_encode(Java_To_Runtime(meth));
13466   ins_pipe(pipe_slow);
13467 %}
13468 
13469 // Call runtime without safepoint
13470 instruct CallLeafNoFPDirect(method meth)
13471 %{
13472   match(CallLeafNoFP);
13473   effect(USE meth);
13474 
13475   ins_cost(300);
13476   format %{ "call_leaf_nofp,runtime " %}
13477   ins_encode(clear_avx, Java_To_Runtime(meth));
13478   ins_pipe(pipe_slow);
13479 %}
13480 
13481 // Return Instruction
13482 // Remove the return address & jump to it.
13483 // Notice: We always emit a nop after a ret to make sure there is room
13484 // for safepoint patching
13485 instruct Ret()
13486 %{
13487   match(Return);
13488 
13489   format %{ "ret" %}
13490   ins_encode %{
13491     __ ret(0);
13492   %}
13493   ins_pipe(pipe_jmp);
13494 %}
13495 
13496 // Tail Call; Jump from runtime stub to Java code.
13497 // Also known as an 'interprocedural jump'.
13498 // Target of jump will eventually return to caller.
13499 // TailJump below removes the return address.
13500 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
13501 %{
13502   match(TailCall jump_target method_ptr);
13503 
13504   ins_cost(300);
13505   format %{ "jmp     $jump_target\t# rbx holds method" %}
13506   ins_encode %{
13507     __ jmp($jump_target$$Register);
13508   %}
13509   ins_pipe(pipe_jmp);
13510 %}
13511 
13512 // Tail Jump; remove the return address; jump to target.
13513 // TailCall above leaves the return address around.
13514 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13515 %{
13516   match(TailJump jump_target ex_oop);
13517 
13518   ins_cost(300);
13519   format %{ "popq    rdx\t# pop return address\n\t"
13520             "jmp     $jump_target" %}
13521   ins_encode %{
13522     __ popq(as_Register(RDX_enc));
13523     __ jmp($jump_target$$Register);
13524   %}
13525   ins_pipe(pipe_jmp);
13526 %}
13527 
13528 // Create exception oop: created by stack-crawling runtime code.
13529 // Created exception is now available to this handler, and is setup
13530 // just prior to jumping to this handler.  No code emitted.
13531 instruct CreateException(rax_RegP ex_oop)
13532 %{
13533   match(Set ex_oop (CreateEx));
13534 
13535   size(0);
13536   // use the following format syntax
13537   format %{ "# exception oop is in rax; no code emitted" %}
13538   ins_encode();
13539   ins_pipe(empty);
13540 %}
13541 
13542 // Rethrow exception:
13543 // The exception oop will come in the first argument position.
13544 // Then JUMP (not call) to the rethrow stub code.
13545 instruct RethrowException()
13546 %{
13547   match(Rethrow);
13548 
13549   // use the following format syntax
13550   format %{ "jmp     rethrow_stub" %}
13551   ins_encode(enc_rethrow);
13552   ins_pipe(pipe_jmp);
13553 %}
13554 
13555 // ============================================================================
13556 // This name is KNOWN by the ADLC and cannot be changed.
13557 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13558 // for this guy.
13559 instruct tlsLoadP(r15_RegP dst) %{
13560   match(Set dst (ThreadLocal));
13561   effect(DEF dst);
13562 
13563   size(0);
13564   format %{ "# TLS is in R15" %}
13565   ins_encode( /*empty encoding*/ );
13566   ins_pipe(ialu_reg_reg);
13567 %}
13568 
13569 
13570 //----------PEEPHOLE RULES-----------------------------------------------------
13571 // These must follow all instruction definitions as they use the names
13572 // defined in the instructions definitions.
13573 //
13574 // peeppredicate ( rule_predicate );
13575 // // the predicate unless which the peephole rule will be ignored
13576 //
13577 // peepmatch ( root_instr_name [preceding_instruction]* );
13578 //
13579 // peepprocedure ( procedure_name );
13580 // // provide a procedure name to perform the optimization, the procedure should
13581 // // reside in the architecture dependent peephole file, the method has the
13582 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
13583 // // with the arguments being the basic block, the current node index inside the
13584 // // block, the register allocator, the functions upon invoked return a new node
13585 // // defined in peepreplace, and the rules of the nodes appearing in the
13586 // // corresponding peepmatch, the function return true if successful, else
13587 // // return false
13588 //
13589 // peepconstraint %{
13590 // (instruction_number.operand_name relational_op instruction_number.operand_name
13591 //  [, ...] );
13592 // // instruction numbers are zero-based using left to right order in peepmatch
13593 //
13594 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13595 // // provide an instruction_number.operand_name for each operand that appears
13596 // // in the replacement instruction's match rule
13597 //
13598 // ---------VM FLAGS---------------------------------------------------------
13599 //
13600 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13601 //
13602 // Each peephole rule is given an identifying number starting with zero and
13603 // increasing by one in the order seen by the parser.  An individual peephole
13604 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13605 // on the command-line.
13606 //
13607 // ---------CURRENT LIMITATIONS----------------------------------------------
13608 //
13609 // Only transformations inside a basic block (do we need more for peephole)
13610 //
13611 // ---------EXAMPLE----------------------------------------------------------
13612 //
13613 // // pertinent parts of existing instructions in architecture description
13614 // instruct movI(rRegI dst, rRegI src)
13615 // %{
13616 //   match(Set dst (CopyI src));
13617 // %}
13618 //
13619 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13620 // %{
13621 //   match(Set dst (AddI dst src));
13622 //   effect(KILL cr);
13623 // %}
13624 //
13625 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
13626 // %{
13627 //   match(Set dst (AddI dst src));
13628 // %}
13629 //
13630 // 1. Simple replacement
13631 // - Only match adjacent instructions in same basic block
13632 // - Only equality constraints
13633 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
13634 // - Only one replacement instruction
13635 //
13636 // // Change (inc mov) to lea
13637 // peephole %{
13638 //   // lea should only be emitted when beneficial
13639 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13640 //   // increment preceded by register-register move
13641 //   peepmatch ( incI_rReg movI );
13642 //   // require that the destination register of the increment
13643 //   // match the destination register of the move
13644 //   peepconstraint ( 0.dst == 1.dst );
13645 //   // construct a replacement instruction that sets
13646 //   // the destination to ( move's source register + one )
13647 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13648 // %}
13649 //
13650 // 2. Procedural replacement
13651 // - More flexible finding relevent nodes
13652 // - More flexible constraints
13653 // - More flexible transformations
13654 // - May utilise architecture-dependent API more effectively
13655 // - Currently only one replacement instruction due to adlc parsing capabilities
13656 //
13657 // // Change (inc mov) to lea
13658 // peephole %{
13659 //   // lea should only be emitted when beneficial
13660 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13661 //   // the rule numbers of these nodes inside are passed into the function below
13662 //   peepmatch ( incI_rReg movI );
13663 //   // the method that takes the responsibility of transformation
13664 //   peepprocedure ( inc_mov_to_lea );
13665 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
13666 //   // node is passed into the function above
13667 //   peepreplace ( leaI_rReg_immI() );
13668 // %}
13669 
13670 // These instructions is not matched by the matcher but used by the peephole
13671 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
13672 %{
13673   predicate(false);
13674   match(Set dst (AddI src1 src2));
13675   format %{ "leal    $dst, [$src1 + $src2]" %}
13676   ins_encode %{
13677     Register dst = $dst$$Register;
13678     Register src1 = $src1$$Register;
13679     Register src2 = $src2$$Register;
13680     if (src1 != rbp && src1 != r13) {
13681       __ leal(dst, Address(src1, src2, Address::times_1));
13682     } else {
13683       assert(src2 != rbp && src2 != r13, "");
13684       __ leal(dst, Address(src2, src1, Address::times_1));
13685     }
13686   %}
13687   ins_pipe(ialu_reg_reg);
13688 %}
13689 
13690 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
13691 %{
13692   predicate(false);
13693   match(Set dst (AddI src1 src2));
13694   format %{ "leal    $dst, [$src1 + $src2]" %}
13695   ins_encode %{
13696     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
13697   %}
13698   ins_pipe(ialu_reg_reg);
13699 %}
13700 
13701 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
13702 %{
13703   predicate(false);
13704   match(Set dst (LShiftI src shift));
13705   format %{ "leal    $dst, [$src << $shift]" %}
13706   ins_encode %{
13707     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13708     Register src = $src$$Register;
13709     if (scale == Address::times_2 && src != rbp && src != r13) {
13710       __ leal($dst$$Register, Address(src, src, Address::times_1));
13711     } else {
13712       __ leal($dst$$Register, Address(noreg, src, scale));
13713     }
13714   %}
13715   ins_pipe(ialu_reg_reg);
13716 %}
13717 
13718 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
13719 %{
13720   predicate(false);
13721   match(Set dst (AddL src1 src2));
13722   format %{ "leaq    $dst, [$src1 + $src2]" %}
13723   ins_encode %{
13724     Register dst = $dst$$Register;
13725     Register src1 = $src1$$Register;
13726     Register src2 = $src2$$Register;
13727     if (src1 != rbp && src1 != r13) {
13728       __ leaq(dst, Address(src1, src2, Address::times_1));
13729     } else {
13730       assert(src2 != rbp && src2 != r13, "");
13731       __ leaq(dst, Address(src2, src1, Address::times_1));
13732     }
13733   %}
13734   ins_pipe(ialu_reg_reg);
13735 %}
13736 
13737 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
13738 %{
13739   predicate(false);
13740   match(Set dst (AddL src1 src2));
13741   format %{ "leaq    $dst, [$src1 + $src2]" %}
13742   ins_encode %{
13743     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
13744   %}
13745   ins_pipe(ialu_reg_reg);
13746 %}
13747 
13748 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
13749 %{
13750   predicate(false);
13751   match(Set dst (LShiftL src shift));
13752   format %{ "leaq    $dst, [$src << $shift]" %}
13753   ins_encode %{
13754     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13755     Register src = $src$$Register;
13756     if (scale == Address::times_2 && src != rbp && src != r13) {
13757       __ leaq($dst$$Register, Address(src, src, Address::times_1));
13758     } else {
13759       __ leaq($dst$$Register, Address(noreg, src, scale));
13760     }
13761   %}
13762   ins_pipe(ialu_reg_reg);
13763 %}
13764 
13765 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
13766 // sal}) with lea instructions. The {add, sal} rules are beneficial in
13767 // processors with at least partial ALU support for lea
13768 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
13769 // beneficial for processors with full ALU support
13770 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
13771 
13772 peephole
13773 %{
13774   peeppredicate(VM_Version::supports_fast_2op_lea());
13775   peepmatch (addI_rReg);
13776   peepprocedure (lea_coalesce_reg);
13777   peepreplace (leaI_rReg_rReg_peep());
13778 %}
13779 
13780 peephole
13781 %{
13782   peeppredicate(VM_Version::supports_fast_2op_lea());
13783   peepmatch (addI_rReg_imm);
13784   peepprocedure (lea_coalesce_imm);
13785   peepreplace (leaI_rReg_immI_peep());
13786 %}
13787 
13788 peephole
13789 %{
13790   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13791                 VM_Version::is_intel_cascade_lake());
13792   peepmatch (incI_rReg);
13793   peepprocedure (lea_coalesce_imm);
13794   peepreplace (leaI_rReg_immI_peep());
13795 %}
13796 
13797 peephole
13798 %{
13799   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13800                 VM_Version::is_intel_cascade_lake());
13801   peepmatch (decI_rReg);
13802   peepprocedure (lea_coalesce_imm);
13803   peepreplace (leaI_rReg_immI_peep());
13804 %}
13805 
13806 peephole
13807 %{
13808   peeppredicate(VM_Version::supports_fast_2op_lea());
13809   peepmatch (salI_rReg_immI2);
13810   peepprocedure (lea_coalesce_imm);
13811   peepreplace (leaI_rReg_immI2_peep());
13812 %}
13813 
13814 peephole
13815 %{
13816   peeppredicate(VM_Version::supports_fast_2op_lea());
13817   peepmatch (addL_rReg);
13818   peepprocedure (lea_coalesce_reg);
13819   peepreplace (leaL_rReg_rReg_peep());
13820 %}
13821 
13822 peephole
13823 %{
13824   peeppredicate(VM_Version::supports_fast_2op_lea());
13825   peepmatch (addL_rReg_imm);
13826   peepprocedure (lea_coalesce_imm);
13827   peepreplace (leaL_rReg_immL32_peep());
13828 %}
13829 
13830 peephole
13831 %{
13832   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13833                 VM_Version::is_intel_cascade_lake());
13834   peepmatch (incL_rReg);
13835   peepprocedure (lea_coalesce_imm);
13836   peepreplace (leaL_rReg_immL32_peep());
13837 %}
13838 
13839 peephole
13840 %{
13841   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13842                 VM_Version::is_intel_cascade_lake());
13843   peepmatch (decL_rReg);
13844   peepprocedure (lea_coalesce_imm);
13845   peepreplace (leaL_rReg_immL32_peep());
13846 %}
13847 
13848 peephole
13849 %{
13850   peeppredicate(VM_Version::supports_fast_2op_lea());
13851   peepmatch (salL_rReg_immI2);
13852   peepprocedure (lea_coalesce_imm);
13853   peepreplace (leaL_rReg_immI2_peep());
13854 %}
13855 
13856 //----------SMARTSPILL RULES---------------------------------------------------
13857 // These must follow all instruction definitions as they use the names
13858 // defined in the instructions definitions.