1 //
    2 // Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RDX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RDI int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 
  323 source_hpp %{
  324 
  325 #include "peephole_x86_64.hpp"
  326 
  327 %}
  328 
  329 // Register masks
  330 source_hpp %{
  331 
  332 extern RegMask _ANY_REG_mask;
  333 extern RegMask _PTR_REG_mask;
  334 extern RegMask _PTR_REG_NO_RBP_mask;
  335 extern RegMask _PTR_NO_RAX_REG_mask;
  336 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  337 extern RegMask _LONG_REG_mask;
  338 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  339 extern RegMask _LONG_NO_RCX_REG_mask;
  340 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  341 extern RegMask _INT_REG_mask;
  342 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  343 extern RegMask _INT_NO_RCX_REG_mask;
  344 extern RegMask _INT_NO_RBP_R13_REG_mask;
  345 extern RegMask _FLOAT_REG_mask;
  346 
  347 extern RegMask _STACK_OR_PTR_REG_mask;
  348 extern RegMask _STACK_OR_LONG_REG_mask;
  349 extern RegMask _STACK_OR_INT_REG_mask;
  350 
  351 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  352 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  353 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  354 
  355 %}
  356 
  357 source %{
  358 #define   RELOC_IMM64    Assembler::imm_operand
  359 #define   RELOC_DISP32   Assembler::disp32_operand
  360 
  361 #define __ _masm.
  362 
  363 RegMask _ANY_REG_mask;
  364 RegMask _PTR_REG_mask;
  365 RegMask _PTR_REG_NO_RBP_mask;
  366 RegMask _PTR_NO_RAX_REG_mask;
  367 RegMask _PTR_NO_RAX_RBX_REG_mask;
  368 RegMask _LONG_REG_mask;
  369 RegMask _LONG_NO_RAX_RDX_REG_mask;
  370 RegMask _LONG_NO_RCX_REG_mask;
  371 RegMask _LONG_NO_RBP_R13_REG_mask;
  372 RegMask _INT_REG_mask;
  373 RegMask _INT_NO_RAX_RDX_REG_mask;
  374 RegMask _INT_NO_RCX_REG_mask;
  375 RegMask _INT_NO_RBP_R13_REG_mask;
  376 RegMask _FLOAT_REG_mask;
  377 RegMask _STACK_OR_PTR_REG_mask;
  378 RegMask _STACK_OR_LONG_REG_mask;
  379 RegMask _STACK_OR_INT_REG_mask;
  380 
  381 static bool need_r12_heapbase() {
  382   return UseCompressedOops;
  383 }
  384 
  385 void reg_mask_init() {
  386   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  387   // We derive a number of subsets from it.
  388   _ANY_REG_mask = _ALL_REG_mask;
  389 
  390   if (PreserveFramePointer) {
  391     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  392     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  393   }
  394   if (need_r12_heapbase()) {
  395     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  396     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  397   }
  398 
  399   _PTR_REG_mask = _ANY_REG_mask;
  400   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  401   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  402   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  403   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  404 
  405   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  406   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  407 
  408   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  409   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  410   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  411 
  412   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  413   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  414   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  415 
  416   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  417   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  418   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  419 
  420   _LONG_REG_mask = _PTR_REG_mask;
  421   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  422   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  423 
  424   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  425   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  426   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  427   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  428   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  429 
  430   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  431   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  432   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  433 
  434   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  435   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  436   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  437   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  438   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  439 
  440   _INT_REG_mask = _ALL_INT_REG_mask;
  441   if (PreserveFramePointer) {
  442     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  443   }
  444   if (need_r12_heapbase()) {
  445     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  446   }
  447 
  448   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  449   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  450 
  451   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  452   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  453   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  454 
  455   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  456   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  457 
  458   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  459   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  460   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  461 
  462   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  463   // from the float_reg_legacy/float_reg_evex register class.
  464   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  465 }
  466 
  467 static bool generate_vzeroupper(Compile* C) {
  468   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  469 }
  470 
  471 static int clear_avx_size() {
  472   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   int offset = 13; // movq r10,#addr; callq (r10)
  494   if (this->ideal_Opcode() != Op_CallLeafVector) {
  495     offset += clear_avx_size();
  496   }
  497   return offset;
  498 }
  499 //
  500 // Compute padding required for nodes which need alignment
  501 //
  502 
  503 // The address of the call instruction needs to be 4-byte aligned to
  504 // ensure that it does not span a cache line so that it can be patched.
  505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  506 {
  507   current_offset += clear_avx_size(); // skip vzeroupper
  508   current_offset += 1; // skip call opcode byte
  509   return align_up(current_offset, alignment_required()) - current_offset;
  510 }
  511 
  512 // The address of the call instruction needs to be 4-byte aligned to
  513 // ensure that it does not span a cache line so that it can be patched.
  514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  515 {
  516   current_offset += clear_avx_size(); // skip vzeroupper
  517   current_offset += 11; // skip movq instruction + call opcode byte
  518   return align_up(current_offset, alignment_required()) - current_offset;
  519 }
  520 
  521 // EMIT_RM()
  522 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  523   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  524   cbuf.insts()->emit_int8(c);
  525 }
  526 
  527 // EMIT_CC()
  528 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  529   unsigned char c = (unsigned char) (f1 | f2);
  530   cbuf.insts()->emit_int8(c);
  531 }
  532 
  533 // EMIT_OPCODE()
  534 void emit_opcode(CodeBuffer &cbuf, int code) {
  535   cbuf.insts()->emit_int8((unsigned char) code);
  536 }
  537 
  538 // EMIT_OPCODE() w/ relocation information
  539 void emit_opcode(CodeBuffer &cbuf,
  540                  int code, relocInfo::relocType reloc, int offset, int format)
  541 {
  542   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  543   emit_opcode(cbuf, code);
  544 }
  545 
  546 // EMIT_D8()
  547 void emit_d8(CodeBuffer &cbuf, int d8) {
  548   cbuf.insts()->emit_int8((unsigned char) d8);
  549 }
  550 
  551 // EMIT_D16()
  552 void emit_d16(CodeBuffer &cbuf, int d16) {
  553   cbuf.insts()->emit_int16(d16);
  554 }
  555 
  556 // EMIT_D32()
  557 void emit_d32(CodeBuffer &cbuf, int d32) {
  558   cbuf.insts()->emit_int32(d32);
  559 }
  560 
  561 // EMIT_D64()
  562 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  563   cbuf.insts()->emit_int64(d64);
  564 }
  565 
  566 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  567 void emit_d32_reloc(CodeBuffer& cbuf,
  568                     int d32,
  569                     relocInfo::relocType reloc,
  570                     int format)
  571 {
  572   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  573   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  574   cbuf.insts()->emit_int32(d32);
  575 }
  576 
  577 // emit 32 bit value and construct relocation entry from RelocationHolder
  578 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
  579 #ifdef ASSERT
  580   if (rspec.reloc()->type() == relocInfo::oop_type &&
  581       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
  582     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
  583     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  584   }
  585 #endif
  586   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  587   cbuf.insts()->emit_int32(d32);
  588 }
  589 
  590 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  591   address next_ip = cbuf.insts_end() + 4;
  592   emit_d32_reloc(cbuf, (int) (addr - next_ip),
  593                  external_word_Relocation::spec(addr),
  594                  RELOC_DISP32);
  595 }
  596 
  597 
  598 // emit 64 bit value and construct relocation entry from relocInfo::relocType
  599 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  600   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  601   cbuf.insts()->emit_int64(d64);
  602 }
  603 
  604 // emit 64 bit value and construct relocation entry from RelocationHolder
  605 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
  606 #ifdef ASSERT
  607   if (rspec.reloc()->type() == relocInfo::oop_type &&
  608       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
  609     assert(Universe::heap()->is_in((address)d64), "should be real oop");
  610     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  611   }
  612 #endif
  613   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  614   cbuf.insts()->emit_int64(d64);
  615 }
  616 
  617 // Access stack slot for load or store
  618 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
  619 {
  620   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  621   if (-0x80 <= disp && disp < 0x80) {
  622     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
  623     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  624     emit_d8(cbuf, disp);     // Displacement  // R/M byte
  625   } else {
  626     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
  627     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  628     emit_d32(cbuf, disp);     // Displacement // R/M byte
  629   }
  630 }
  631 
  632    // rRegI ereg, memory mem) %{    // emit_reg_mem
  633 void encode_RegMem(CodeBuffer &cbuf,
  634                    int reg,
  635                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
  636 {
  637   assert(disp_reloc == relocInfo::none, "cannot have disp");
  638   int regenc = reg & 7;
  639   int baseenc = base & 7;
  640   int indexenc = index & 7;
  641 
  642   // There is no index & no scale, use form without SIB byte
  643   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
  644     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  645     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  646       emit_rm(cbuf, 0x0, regenc, baseenc); // *
  647     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  648       // If 8-bit displacement, mode 0x1
  649       emit_rm(cbuf, 0x1, regenc, baseenc); // *
  650       emit_d8(cbuf, disp);
  651     } else {
  652       // If 32-bit displacement
  653       if (base == -1) { // Special flag for absolute address
  654         emit_rm(cbuf, 0x0, regenc, 0x5); // *
  655         if (disp_reloc != relocInfo::none) {
  656           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  657         } else {
  658           emit_d32(cbuf, disp);
  659         }
  660       } else {
  661         // Normal base + offset
  662         emit_rm(cbuf, 0x2, regenc, baseenc); // *
  663         if (disp_reloc != relocInfo::none) {
  664           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  665         } else {
  666           emit_d32(cbuf, disp);
  667         }
  668       }
  669     }
  670   } else {
  671     // Else, encode with the SIB byte
  672     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  673     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  674       // If no displacement
  675       emit_rm(cbuf, 0x0, regenc, 0x4); // *
  676       emit_rm(cbuf, scale, indexenc, baseenc);
  677     } else {
  678       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  679         // If 8-bit displacement, mode 0x1
  680         emit_rm(cbuf, 0x1, regenc, 0x4); // *
  681         emit_rm(cbuf, scale, indexenc, baseenc);
  682         emit_d8(cbuf, disp);
  683       } else {
  684         // If 32-bit displacement
  685         if (base == 0x04 ) {
  686           emit_rm(cbuf, 0x2, regenc, 0x4);
  687           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
  688         } else {
  689           emit_rm(cbuf, 0x2, regenc, 0x4);
  690           emit_rm(cbuf, scale, indexenc, baseenc); // *
  691         }
  692         if (disp_reloc != relocInfo::none) {
  693           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  694         } else {
  695           emit_d32(cbuf, disp);
  696         }
  697       }
  698     }
  699   }
  700 }
  701 
  702 // This could be in MacroAssembler but it's fairly C2 specific
  703 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  704   Label exit;
  705   __ jccb(Assembler::noParity, exit);
  706   __ pushf();
  707   //
  708   // comiss/ucomiss instructions set ZF,PF,CF flags and
  709   // zero OF,AF,SF for NaN values.
  710   // Fixup flags by zeroing ZF,PF so that compare of NaN
  711   // values returns 'less than' result (CF is set).
  712   // Leave the rest of flags unchanged.
  713   //
  714   //    7 6 5 4 3 2 1 0
  715   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  716   //    0 0 1 0 1 0 1 1   (0x2B)
  717   //
  718   __ andq(Address(rsp, 0), 0xffffff2b);
  719   __ popf();
  720   __ bind(exit);
  721 }
  722 
  723 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  724   Label done;
  725   __ movl(dst, -1);
  726   __ jcc(Assembler::parity, done);
  727   __ jcc(Assembler::below, done);
  728   __ setb(Assembler::notEqual, dst);
  729   __ movzbl(dst, dst);
  730   __ bind(done);
  731 }
  732 
  733 // Math.min()    # Math.max()
  734 // --------------------------
  735 // ucomis[s/d]   #
  736 // ja   -> b     # a
  737 // jp   -> NaN   # NaN
  738 // jb   -> a     # b
  739 // je            #
  740 // |-jz -> a | b # a & b
  741 // |    -> a     #
  742 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  743                      XMMRegister a, XMMRegister b,
  744                      XMMRegister xmmt, Register rt,
  745                      bool min, bool single) {
  746 
  747   Label nan, zero, below, above, done;
  748 
  749   if (single)
  750     __ ucomiss(a, b);
  751   else
  752     __ ucomisd(a, b);
  753 
  754   if (dst->encoding() != (min ? b : a)->encoding())
  755     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  756   else
  757     __ jccb(Assembler::above, done);
  758 
  759   __ jccb(Assembler::parity, nan);  // PF=1
  760   __ jccb(Assembler::below, below); // CF=1
  761 
  762   // equal
  763   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  764   if (single) {
  765     __ ucomiss(a, xmmt);
  766     __ jccb(Assembler::equal, zero);
  767 
  768     __ movflt(dst, a);
  769     __ jmp(done);
  770   }
  771   else {
  772     __ ucomisd(a, xmmt);
  773     __ jccb(Assembler::equal, zero);
  774 
  775     __ movdbl(dst, a);
  776     __ jmp(done);
  777   }
  778 
  779   __ bind(zero);
  780   if (min)
  781     __ vpor(dst, a, b, Assembler::AVX_128bit);
  782   else
  783     __ vpand(dst, a, b, Assembler::AVX_128bit);
  784 
  785   __ jmp(done);
  786 
  787   __ bind(above);
  788   if (single)
  789     __ movflt(dst, min ? b : a);
  790   else
  791     __ movdbl(dst, min ? b : a);
  792 
  793   __ jmp(done);
  794 
  795   __ bind(nan);
  796   if (single) {
  797     __ movl(rt, 0x7fc00000); // Float.NaN
  798     __ movdl(dst, rt);
  799   }
  800   else {
  801     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  802     __ movdq(dst, rt);
  803   }
  804   __ jmp(done);
  805 
  806   __ bind(below);
  807   if (single)
  808     __ movflt(dst, min ? a : b);
  809   else
  810     __ movdbl(dst, min ? a : b);
  811 
  812   __ bind(done);
  813 }
  814 
  815 //=============================================================================
  816 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  817 
  818 int ConstantTable::calculate_table_base_offset() const {
  819   return 0;  // absolute addressing, no offset
  820 }
  821 
  822 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  823 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  824   ShouldNotReachHere();
  825 }
  826 
  827 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  828   // Empty encoding
  829 }
  830 
  831 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  832   return 0;
  833 }
  834 
  835 #ifndef PRODUCT
  836 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  837   st->print("# MachConstantBaseNode (empty encoding)");
  838 }
  839 #endif
  840 
  841 
  842 //=============================================================================
  843 #ifndef PRODUCT
  844 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  845   Compile* C = ra_->C;
  846 
  847   int framesize = C->output()->frame_size_in_bytes();
  848   int bangsize = C->output()->bang_size_in_bytes();
  849   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  850   // Remove wordSize for return addr which is already pushed.
  851   framesize -= wordSize;
  852 
  853   if (C->output()->need_stack_bang(bangsize)) {
  854     framesize -= wordSize;
  855     st->print("# stack bang (%d bytes)", bangsize);
  856     st->print("\n\t");
  857     st->print("pushq   rbp\t# Save rbp");
  858     if (PreserveFramePointer) {
  859         st->print("\n\t");
  860         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  861     }
  862     if (framesize) {
  863       st->print("\n\t");
  864       st->print("subq    rsp, #%d\t# Create frame",framesize);
  865     }
  866   } else {
  867     st->print("subq    rsp, #%d\t# Create frame",framesize);
  868     st->print("\n\t");
  869     framesize -= wordSize;
  870     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  871     if (PreserveFramePointer) {
  872       st->print("\n\t");
  873       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  874       if (framesize > 0) {
  875         st->print("\n\t");
  876         st->print("addq    rbp, #%d", framesize);
  877       }
  878     }
  879   }
  880 
  881   if (VerifyStackAtCalls) {
  882     st->print("\n\t");
  883     framesize -= wordSize;
  884     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  885 #ifdef ASSERT
  886     st->print("\n\t");
  887     st->print("# stack alignment check");
  888 #endif
  889   }
  890   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  891     st->print("\n\t");
  892     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  893     st->print("\n\t");
  894     st->print("je      fast_entry\t");
  895     st->print("\n\t");
  896     st->print("call    #nmethod_entry_barrier_stub\t");
  897     st->print("\n\tfast_entry:");
  898   }
  899   st->cr();
  900 }
  901 #endif
  902 
  903 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  904   Compile* C = ra_->C;
  905   C2_MacroAssembler _masm(&cbuf);
  906 
  907   int framesize = C->output()->frame_size_in_bytes();
  908   int bangsize = C->output()->bang_size_in_bytes();
  909 
  910   if (C->clinit_barrier_on_entry()) {
  911     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  912     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  913 
  914     Label L_skip_barrier;
  915     Register klass = rscratch1;
  916 
  917     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  918     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  919 
  920     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  921 
  922     __ bind(L_skip_barrier);
  923   }
  924 
  925   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
  926 
  927   C->output()->set_frame_complete(cbuf.insts_size());
  928 
  929   if (C->has_mach_constant_base_node()) {
  930     // NOTE: We set the table base offset here because users might be
  931     // emitted before MachConstantBaseNode.
  932     ConstantTable& constant_table = C->output()->constant_table();
  933     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  934   }
  935 }
  936 
  937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  938 {
  939   return MachNode::size(ra_); // too many variables; just compute it
  940                               // the hard way
  941 }
  942 
  943 int MachPrologNode::reloc() const
  944 {
  945   return 0; // a large enough number
  946 }
  947 
  948 //=============================================================================
  949 #ifndef PRODUCT
  950 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  951 {
  952   Compile* C = ra_->C;
  953   if (generate_vzeroupper(C)) {
  954     st->print("vzeroupper");
  955     st->cr(); st->print("\t");
  956   }
  957 
  958   int framesize = C->output()->frame_size_in_bytes();
  959   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  960   // Remove word for return adr already pushed
  961   // and RBP
  962   framesize -= 2*wordSize;
  963 
  964   if (framesize) {
  965     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  966     st->print("\t");
  967   }
  968 
  969   st->print_cr("popq    rbp");
  970   if (do_polling() && C->is_method_compilation()) {
  971     st->print("\t");
  972     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  973                  "ja      #safepoint_stub\t"
  974                  "# Safepoint: poll for GC");
  975   }
  976 }
  977 #endif
  978 
  979 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  980 {
  981   Compile* C = ra_->C;
  982   MacroAssembler _masm(&cbuf);
  983 
  984   if (generate_vzeroupper(C)) {
  985     // Clear upper bits of YMM registers when current compiled code uses
  986     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  987     __ vzeroupper();
  988   }
  989 
  990   int framesize = C->output()->frame_size_in_bytes();
  991   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  992   // Remove word for return adr already pushed
  993   // and RBP
  994   framesize -= 2*wordSize;
  995 
  996   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  997 
  998   if (framesize) {
  999     emit_opcode(cbuf, Assembler::REX_W);
 1000     if (framesize < 0x80) {
 1001       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 1002       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1003       emit_d8(cbuf, framesize);
 1004     } else {
 1005       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 1006       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1007       emit_d32(cbuf, framesize);
 1008     }
 1009   }
 1010 
 1011   // popq rbp
 1012   emit_opcode(cbuf, 0x58 | RBP_enc);
 1013 
 1014   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1015     __ reserved_stack_check();
 1016   }
 1017 
 1018   if (do_polling() && C->is_method_compilation()) {
 1019     MacroAssembler _masm(&cbuf);
 1020     Label dummy_label;
 1021     Label* code_stub = &dummy_label;
 1022     if (!C->output()->in_scratch_emit_size()) {
 1023       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1024       C->output()->add_stub(stub);
 1025       code_stub = &stub->entry();
 1026     }
 1027     __ relocate(relocInfo::poll_return_type);
 1028     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1029   }
 1030 }
 1031 
 1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1033 {
 1034   return MachNode::size(ra_); // too many variables; just compute it
 1035                               // the hard way
 1036 }
 1037 
 1038 int MachEpilogNode::reloc() const
 1039 {
 1040   return 2; // a large enough number
 1041 }
 1042 
 1043 const Pipeline* MachEpilogNode::pipeline() const
 1044 {
 1045   return MachNode::pipeline_class();
 1046 }
 1047 
 1048 //=============================================================================
 1049 
 1050 enum RC {
 1051   rc_bad,
 1052   rc_int,
 1053   rc_kreg,
 1054   rc_float,
 1055   rc_stack
 1056 };
 1057 
 1058 static enum RC rc_class(OptoReg::Name reg)
 1059 {
 1060   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1061 
 1062   if (OptoReg::is_stack(reg)) return rc_stack;
 1063 
 1064   VMReg r = OptoReg::as_VMReg(reg);
 1065 
 1066   if (r->is_Register()) return rc_int;
 1067 
 1068   if (r->is_KRegister()) return rc_kreg;
 1069 
 1070   assert(r->is_XMMRegister(), "must be");
 1071   return rc_float;
 1072 }
 1073 
 1074 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 1075 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
 1076                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 1077 
 1078 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
 1079                      int stack_offset, int reg, uint ireg, outputStream* st);
 1080 
 1081 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 1082                                       int dst_offset, uint ireg, outputStream* st) {
 1083   if (cbuf) {
 1084     MacroAssembler _masm(cbuf);
 1085     switch (ireg) {
 1086     case Op_VecS:
 1087       __ movq(Address(rsp, -8), rax);
 1088       __ movl(rax, Address(rsp, src_offset));
 1089       __ movl(Address(rsp, dst_offset), rax);
 1090       __ movq(rax, Address(rsp, -8));
 1091       break;
 1092     case Op_VecD:
 1093       __ pushq(Address(rsp, src_offset));
 1094       __ popq (Address(rsp, dst_offset));
 1095       break;
 1096     case Op_VecX:
 1097       __ pushq(Address(rsp, src_offset));
 1098       __ popq (Address(rsp, dst_offset));
 1099       __ pushq(Address(rsp, src_offset+8));
 1100       __ popq (Address(rsp, dst_offset+8));
 1101       break;
 1102     case Op_VecY:
 1103       __ vmovdqu(Address(rsp, -32), xmm0);
 1104       __ vmovdqu(xmm0, Address(rsp, src_offset));
 1105       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 1106       __ vmovdqu(xmm0, Address(rsp, -32));
 1107       break;
 1108     case Op_VecZ:
 1109       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 1110       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1111       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1112       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1113       break;
 1114     default:
 1115       ShouldNotReachHere();
 1116     }
 1117 #ifndef PRODUCT
 1118   } else {
 1119     switch (ireg) {
 1120     case Op_VecS:
 1121       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1122                 "movl    rax, [rsp + #%d]\n\t"
 1123                 "movl    [rsp + #%d], rax\n\t"
 1124                 "movq    rax, [rsp - #8]",
 1125                 src_offset, dst_offset);
 1126       break;
 1127     case Op_VecD:
 1128       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1129                 "popq    [rsp + #%d]",
 1130                 src_offset, dst_offset);
 1131       break;
 1132      case Op_VecX:
 1133       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 1134                 "popq    [rsp + #%d]\n\t"
 1135                 "pushq   [rsp + #%d]\n\t"
 1136                 "popq    [rsp + #%d]",
 1137                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 1138       break;
 1139     case Op_VecY:
 1140       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1141                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1142                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1143                 "vmovdqu xmm0, [rsp - #32]",
 1144                 src_offset, dst_offset);
 1145       break;
 1146     case Op_VecZ:
 1147       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1148                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1149                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1150                 "vmovdqu xmm0, [rsp - #64]",
 1151                 src_offset, dst_offset);
 1152       break;
 1153     default:
 1154       ShouldNotReachHere();
 1155     }
 1156 #endif
 1157   }
 1158 }
 1159 
 1160 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1161                                        PhaseRegAlloc* ra_,
 1162                                        bool do_size,
 1163                                        outputStream* st) const {
 1164   assert(cbuf != NULL || st  != NULL, "sanity");
 1165   // Get registers to move
 1166   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1167   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1168   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1169   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1170 
 1171   enum RC src_second_rc = rc_class(src_second);
 1172   enum RC src_first_rc = rc_class(src_first);
 1173   enum RC dst_second_rc = rc_class(dst_second);
 1174   enum RC dst_first_rc = rc_class(dst_first);
 1175 
 1176   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1177          "must move at least 1 register" );
 1178 
 1179   if (src_first == dst_first && src_second == dst_second) {
 1180     // Self copy, no move
 1181     return 0;
 1182   }
 1183   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1184     uint ireg = ideal_reg();
 1185     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1186     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1187     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1188       // mem -> mem
 1189       int src_offset = ra_->reg2offset(src_first);
 1190       int dst_offset = ra_->reg2offset(dst_first);
 1191       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1192     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1193       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1194     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1195       int stack_offset = ra_->reg2offset(dst_first);
 1196       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1197     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1198       int stack_offset = ra_->reg2offset(src_first);
 1199       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1200     } else {
 1201       ShouldNotReachHere();
 1202     }
 1203     return 0;
 1204   }
 1205   if (src_first_rc == rc_stack) {
 1206     // mem ->
 1207     if (dst_first_rc == rc_stack) {
 1208       // mem -> mem
 1209       assert(src_second != dst_first, "overlap");
 1210       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1211           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1212         // 64-bit
 1213         int src_offset = ra_->reg2offset(src_first);
 1214         int dst_offset = ra_->reg2offset(dst_first);
 1215         if (cbuf) {
 1216           MacroAssembler _masm(cbuf);
 1217           __ pushq(Address(rsp, src_offset));
 1218           __ popq (Address(rsp, dst_offset));
 1219 #ifndef PRODUCT
 1220         } else {
 1221           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1222                     "popq    [rsp + #%d]",
 1223                      src_offset, dst_offset);
 1224 #endif
 1225         }
 1226       } else {
 1227         // 32-bit
 1228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1230         // No pushl/popl, so:
 1231         int src_offset = ra_->reg2offset(src_first);
 1232         int dst_offset = ra_->reg2offset(dst_first);
 1233         if (cbuf) {
 1234           MacroAssembler _masm(cbuf);
 1235           __ movq(Address(rsp, -8), rax);
 1236           __ movl(rax, Address(rsp, src_offset));
 1237           __ movl(Address(rsp, dst_offset), rax);
 1238           __ movq(rax, Address(rsp, -8));
 1239 #ifndef PRODUCT
 1240         } else {
 1241           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1242                     "movl    rax, [rsp + #%d]\n\t"
 1243                     "movl    [rsp + #%d], rax\n\t"
 1244                     "movq    rax, [rsp - #8]",
 1245                      src_offset, dst_offset);
 1246 #endif
 1247         }
 1248       }
 1249       return 0;
 1250     } else if (dst_first_rc == rc_int) {
 1251       // mem -> gpr
 1252       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1253           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1254         // 64-bit
 1255         int offset = ra_->reg2offset(src_first);
 1256         if (cbuf) {
 1257           MacroAssembler _masm(cbuf);
 1258           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1259 #ifndef PRODUCT
 1260         } else {
 1261           st->print("movq    %s, [rsp + #%d]\t# spill",
 1262                      Matcher::regName[dst_first],
 1263                      offset);
 1264 #endif
 1265         }
 1266       } else {
 1267         // 32-bit
 1268         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1269         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1270         int offset = ra_->reg2offset(src_first);
 1271         if (cbuf) {
 1272           MacroAssembler _masm(cbuf);
 1273           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1274 #ifndef PRODUCT
 1275         } else {
 1276           st->print("movl    %s, [rsp + #%d]\t# spill",
 1277                      Matcher::regName[dst_first],
 1278                      offset);
 1279 #endif
 1280         }
 1281       }
 1282       return 0;
 1283     } else if (dst_first_rc == rc_float) {
 1284       // mem-> xmm
 1285       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1286           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1287         // 64-bit
 1288         int offset = ra_->reg2offset(src_first);
 1289         if (cbuf) {
 1290           MacroAssembler _masm(cbuf);
 1291           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1292 #ifndef PRODUCT
 1293         } else {
 1294           st->print("%s  %s, [rsp + #%d]\t# spill",
 1295                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1296                      Matcher::regName[dst_first],
 1297                      offset);
 1298 #endif
 1299         }
 1300       } else {
 1301         // 32-bit
 1302         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1303         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1304         int offset = ra_->reg2offset(src_first);
 1305         if (cbuf) {
 1306           MacroAssembler _masm(cbuf);
 1307           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1308 #ifndef PRODUCT
 1309         } else {
 1310           st->print("movss   %s, [rsp + #%d]\t# spill",
 1311                      Matcher::regName[dst_first],
 1312                      offset);
 1313 #endif
 1314         }
 1315       }
 1316       return 0;
 1317     } else if (dst_first_rc == rc_kreg) {
 1318       // mem -> kreg
 1319       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1320           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1321         // 64-bit
 1322         int offset = ra_->reg2offset(src_first);
 1323         if (cbuf) {
 1324           MacroAssembler _masm(cbuf);
 1325           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1326 #ifndef PRODUCT
 1327         } else {
 1328           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1329                      Matcher::regName[dst_first],
 1330                      offset);
 1331 #endif
 1332         }
 1333       }
 1334       return 0;
 1335     }
 1336   } else if (src_first_rc == rc_int) {
 1337     // gpr ->
 1338     if (dst_first_rc == rc_stack) {
 1339       // gpr -> mem
 1340       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1341           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1342         // 64-bit
 1343         int offset = ra_->reg2offset(dst_first);
 1344         if (cbuf) {
 1345           MacroAssembler _masm(cbuf);
 1346           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1347 #ifndef PRODUCT
 1348         } else {
 1349           st->print("movq    [rsp + #%d], %s\t# spill",
 1350                      offset,
 1351                      Matcher::regName[src_first]);
 1352 #endif
 1353         }
 1354       } else {
 1355         // 32-bit
 1356         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1357         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1358         int offset = ra_->reg2offset(dst_first);
 1359         if (cbuf) {
 1360           MacroAssembler _masm(cbuf);
 1361           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1362 #ifndef PRODUCT
 1363         } else {
 1364           st->print("movl    [rsp + #%d], %s\t# spill",
 1365                      offset,
 1366                      Matcher::regName[src_first]);
 1367 #endif
 1368         }
 1369       }
 1370       return 0;
 1371     } else if (dst_first_rc == rc_int) {
 1372       // gpr -> gpr
 1373       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1374           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1375         // 64-bit
 1376         if (cbuf) {
 1377           MacroAssembler _masm(cbuf);
 1378           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1379                   as_Register(Matcher::_regEncode[src_first]));
 1380 #ifndef PRODUCT
 1381         } else {
 1382           st->print("movq    %s, %s\t# spill",
 1383                      Matcher::regName[dst_first],
 1384                      Matcher::regName[src_first]);
 1385 #endif
 1386         }
 1387         return 0;
 1388       } else {
 1389         // 32-bit
 1390         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1391         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1392         if (cbuf) {
 1393           MacroAssembler _masm(cbuf);
 1394           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1395                   as_Register(Matcher::_regEncode[src_first]));
 1396 #ifndef PRODUCT
 1397         } else {
 1398           st->print("movl    %s, %s\t# spill",
 1399                      Matcher::regName[dst_first],
 1400                      Matcher::regName[src_first]);
 1401 #endif
 1402         }
 1403         return 0;
 1404       }
 1405     } else if (dst_first_rc == rc_float) {
 1406       // gpr -> xmm
 1407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1409         // 64-bit
 1410         if (cbuf) {
 1411           MacroAssembler _masm(cbuf);
 1412           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1413 #ifndef PRODUCT
 1414         } else {
 1415           st->print("movdq   %s, %s\t# spill",
 1416                      Matcher::regName[dst_first],
 1417                      Matcher::regName[src_first]);
 1418 #endif
 1419         }
 1420       } else {
 1421         // 32-bit
 1422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1424         if (cbuf) {
 1425           MacroAssembler _masm(cbuf);
 1426           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1427 #ifndef PRODUCT
 1428         } else {
 1429           st->print("movdl   %s, %s\t# spill",
 1430                      Matcher::regName[dst_first],
 1431                      Matcher::regName[src_first]);
 1432 #endif
 1433         }
 1434       }
 1435       return 0;
 1436     } else if (dst_first_rc == rc_kreg) {
 1437       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1438           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1439         // 64-bit
 1440         if (cbuf) {
 1441           MacroAssembler _masm(cbuf);
 1442           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1443   #ifndef PRODUCT
 1444         } else {
 1445            st->print("kmovq   %s, %s\t# spill",
 1446                        Matcher::regName[dst_first],
 1447                        Matcher::regName[src_first]);
 1448   #endif
 1449         }
 1450       }
 1451       Unimplemented();
 1452       return 0;
 1453     }
 1454   } else if (src_first_rc == rc_float) {
 1455     // xmm ->
 1456     if (dst_first_rc == rc_stack) {
 1457       // xmm -> mem
 1458       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1459           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1460         // 64-bit
 1461         int offset = ra_->reg2offset(dst_first);
 1462         if (cbuf) {
 1463           MacroAssembler _masm(cbuf);
 1464           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1465 #ifndef PRODUCT
 1466         } else {
 1467           st->print("movsd   [rsp + #%d], %s\t# spill",
 1468                      offset,
 1469                      Matcher::regName[src_first]);
 1470 #endif
 1471         }
 1472       } else {
 1473         // 32-bit
 1474         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1475         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1476         int offset = ra_->reg2offset(dst_first);
 1477         if (cbuf) {
 1478           MacroAssembler _masm(cbuf);
 1479           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1480 #ifndef PRODUCT
 1481         } else {
 1482           st->print("movss   [rsp + #%d], %s\t# spill",
 1483                      offset,
 1484                      Matcher::regName[src_first]);
 1485 #endif
 1486         }
 1487       }
 1488       return 0;
 1489     } else if (dst_first_rc == rc_int) {
 1490       // xmm -> gpr
 1491       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1492           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1493         // 64-bit
 1494         if (cbuf) {
 1495           MacroAssembler _masm(cbuf);
 1496           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1497 #ifndef PRODUCT
 1498         } else {
 1499           st->print("movdq   %s, %s\t# spill",
 1500                      Matcher::regName[dst_first],
 1501                      Matcher::regName[src_first]);
 1502 #endif
 1503         }
 1504       } else {
 1505         // 32-bit
 1506         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1507         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1508         if (cbuf) {
 1509           MacroAssembler _masm(cbuf);
 1510           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1511 #ifndef PRODUCT
 1512         } else {
 1513           st->print("movdl   %s, %s\t# spill",
 1514                      Matcher::regName[dst_first],
 1515                      Matcher::regName[src_first]);
 1516 #endif
 1517         }
 1518       }
 1519       return 0;
 1520     } else if (dst_first_rc == rc_float) {
 1521       // xmm -> xmm
 1522       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1523           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1524         // 64-bit
 1525         if (cbuf) {
 1526           MacroAssembler _masm(cbuf);
 1527           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1528 #ifndef PRODUCT
 1529         } else {
 1530           st->print("%s  %s, %s\t# spill",
 1531                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1532                      Matcher::regName[dst_first],
 1533                      Matcher::regName[src_first]);
 1534 #endif
 1535         }
 1536       } else {
 1537         // 32-bit
 1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1540         if (cbuf) {
 1541           MacroAssembler _masm(cbuf);
 1542           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1543 #ifndef PRODUCT
 1544         } else {
 1545           st->print("%s  %s, %s\t# spill",
 1546                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1547                      Matcher::regName[dst_first],
 1548                      Matcher::regName[src_first]);
 1549 #endif
 1550         }
 1551       }
 1552       return 0;
 1553     } else if (dst_first_rc == rc_kreg) {
 1554       assert(false, "Illegal spilling");
 1555       return 0;
 1556     }
 1557   } else if (src_first_rc == rc_kreg) {
 1558     if (dst_first_rc == rc_stack) {
 1559       // mem -> kreg
 1560       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1561           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1562         // 64-bit
 1563         int offset = ra_->reg2offset(dst_first);
 1564         if (cbuf) {
 1565           MacroAssembler _masm(cbuf);
 1566           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1567 #ifndef PRODUCT
 1568         } else {
 1569           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1570                      offset,
 1571                      Matcher::regName[src_first]);
 1572 #endif
 1573         }
 1574       }
 1575       return 0;
 1576     } else if (dst_first_rc == rc_int) {
 1577       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1578           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1579         // 64-bit
 1580         if (cbuf) {
 1581           MacroAssembler _masm(cbuf);
 1582           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1583 #ifndef PRODUCT
 1584         } else {
 1585          st->print("kmovq   %s, %s\t# spill",
 1586                      Matcher::regName[dst_first],
 1587                      Matcher::regName[src_first]);
 1588 #endif
 1589         }
 1590       }
 1591       Unimplemented();
 1592       return 0;
 1593     } else if (dst_first_rc == rc_kreg) {
 1594       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1595           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1596         // 64-bit
 1597         if (cbuf) {
 1598           MacroAssembler _masm(cbuf);
 1599           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1600 #ifndef PRODUCT
 1601         } else {
 1602          st->print("kmovq   %s, %s\t# spill",
 1603                      Matcher::regName[dst_first],
 1604                      Matcher::regName[src_first]);
 1605 #endif
 1606         }
 1607       }
 1608       return 0;
 1609     } else if (dst_first_rc == rc_float) {
 1610       assert(false, "Illegal spill");
 1611       return 0;
 1612     }
 1613   }
 1614 
 1615   assert(0," foo ");
 1616   Unimplemented();
 1617   return 0;
 1618 }
 1619 
 1620 #ifndef PRODUCT
 1621 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1622   implementation(NULL, ra_, false, st);
 1623 }
 1624 #endif
 1625 
 1626 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1627   implementation(&cbuf, ra_, false, NULL);
 1628 }
 1629 
 1630 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 //=============================================================================
 1635 #ifndef PRODUCT
 1636 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1637 {
 1638   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1639   int reg = ra_->get_reg_first(this);
 1640   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1641             Matcher::regName[reg], offset);
 1642 }
 1643 #endif
 1644 
 1645 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1646 {
 1647   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1648   int reg = ra_->get_encode(this);
 1649   if (offset >= 0x80) {
 1650     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1651     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1652     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1653     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1654     emit_d32(cbuf, offset);
 1655   } else {
 1656     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1657     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1658     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1659     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1660     emit_d8(cbuf, offset);
 1661   }
 1662 }
 1663 
 1664 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1665 {
 1666   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1667   return (offset < 0x80) ? 5 : 8; // REX
 1668 }
 1669 
 1670 //=============================================================================
 1671 #ifndef PRODUCT
 1672 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1673 {
 1674   if (UseCompressedClassPointers) {
 1675     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1676     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1677     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1678   } else {
 1679     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1680                  "# Inline cache check");
 1681   }
 1682   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1683   st->print_cr("\tnop\t# nops to align entry point");
 1684 }
 1685 #endif
 1686 
 1687 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1688 {
 1689   MacroAssembler masm(&cbuf);
 1690   uint insts_size = cbuf.insts_size();
 1691   if (UseCompressedClassPointers) {
 1692     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1693     masm.cmpptr(rax, rscratch1);
 1694   } else {
 1695     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1696   }
 1697 
 1698   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1699 
 1700   /* WARNING these NOPs are critical so that verified entry point is properly
 1701      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1702   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1703   if (OptoBreakpoint) {
 1704     // Leave space for int3
 1705     nops_cnt -= 1;
 1706   }
 1707   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1708   if (nops_cnt > 0)
 1709     masm.nop(nops_cnt);
 1710 }
 1711 
 1712 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1713 {
 1714   return MachNode::size(ra_); // too many variables; just compute it
 1715                               // the hard way
 1716 }
 1717 
 1718 
 1719 //=============================================================================
 1720 
 1721 const bool Matcher::supports_vector_calling_convention(void) {
 1722   if (EnableVectorSupport && UseVectorStubs) {
 1723     return true;
 1724   }
 1725   return false;
 1726 }
 1727 
 1728 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1729   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1730   int lo = XMM0_num;
 1731   int hi = XMM0b_num;
 1732   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1733   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1734   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1735   return OptoRegPair(hi, lo);
 1736 }
 1737 
 1738 // Is this branch offset short enough that a short branch can be used?
 1739 //
 1740 // NOTE: If the platform does not provide any short branch variants, then
 1741 //       this method should return false for offset 0.
 1742 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1743   // The passed offset is relative to address of the branch.
 1744   // On 86 a branch displacement is calculated relative to address
 1745   // of a next instruction.
 1746   offset -= br_size;
 1747 
 1748   // the short version of jmpConUCF2 contains multiple branches,
 1749   // making the reach slightly less
 1750   if (rule == jmpConUCF2_rule)
 1751     return (-126 <= offset && offset <= 125);
 1752   return (-128 <= offset && offset <= 127);
 1753 }
 1754 
 1755 // Return whether or not this register is ever used as an argument.
 1756 // This function is used on startup to build the trampoline stubs in
 1757 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1758 // call in the trampoline, and arguments in those registers not be
 1759 // available to the callee.
 1760 bool Matcher::can_be_java_arg(int reg)
 1761 {
 1762   return
 1763     reg ==  RDI_num || reg == RDI_H_num ||
 1764     reg ==  RSI_num || reg == RSI_H_num ||
 1765     reg ==  RDX_num || reg == RDX_H_num ||
 1766     reg ==  RCX_num || reg == RCX_H_num ||
 1767     reg ==   R8_num || reg ==  R8_H_num ||
 1768     reg ==   R9_num || reg ==  R9_H_num ||
 1769     reg ==  R12_num || reg == R12_H_num ||
 1770     reg == XMM0_num || reg == XMM0b_num ||
 1771     reg == XMM1_num || reg == XMM1b_num ||
 1772     reg == XMM2_num || reg == XMM2b_num ||
 1773     reg == XMM3_num || reg == XMM3b_num ||
 1774     reg == XMM4_num || reg == XMM4b_num ||
 1775     reg == XMM5_num || reg == XMM5b_num ||
 1776     reg == XMM6_num || reg == XMM6b_num ||
 1777     reg == XMM7_num || reg == XMM7b_num;
 1778 }
 1779 
 1780 bool Matcher::is_spillable_arg(int reg)
 1781 {
 1782   return can_be_java_arg(reg);
 1783 }
 1784 
 1785 uint Matcher::int_pressure_limit()
 1786 {
 1787   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1788 }
 1789 
 1790 uint Matcher::float_pressure_limit()
 1791 {
 1792   // After experiment around with different values, the following default threshold
 1793   // works best for LCM's register pressure scheduling on x64.
 1794   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1795   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1796   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1797 }
 1798 
 1799 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1800   // In 64 bit mode a code which use multiply when
 1801   // devisor is constant is faster than hardware
 1802   // DIV instruction (it uses MulHiL).
 1803   return false;
 1804 }
 1805 
 1806 // Register for DIVI projection of divmodI
 1807 RegMask Matcher::divI_proj_mask() {
 1808   return INT_RAX_REG_mask();
 1809 }
 1810 
 1811 // Register for MODI projection of divmodI
 1812 RegMask Matcher::modI_proj_mask() {
 1813   return INT_RDX_REG_mask();
 1814 }
 1815 
 1816 // Register for DIVL projection of divmodL
 1817 RegMask Matcher::divL_proj_mask() {
 1818   return LONG_RAX_REG_mask();
 1819 }
 1820 
 1821 // Register for MODL projection of divmodL
 1822 RegMask Matcher::modL_proj_mask() {
 1823   return LONG_RDX_REG_mask();
 1824 }
 1825 
 1826 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1827 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1828     return NO_REG_mask();
 1829 }
 1830 
 1831 %}
 1832 
 1833 //----------ENCODING BLOCK-----------------------------------------------------
 1834 // This block specifies the encoding classes used by the compiler to
 1835 // output byte streams.  Encoding classes are parameterized macros
 1836 // used by Machine Instruction Nodes in order to generate the bit
 1837 // encoding of the instruction.  Operands specify their base encoding
 1838 // interface with the interface keyword.  There are currently
 1839 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1840 // COND_INTER.  REG_INTER causes an operand to generate a function
 1841 // which returns its register number when queried.  CONST_INTER causes
 1842 // an operand to generate a function which returns the value of the
 1843 // constant when queried.  MEMORY_INTER causes an operand to generate
 1844 // four functions which return the Base Register, the Index Register,
 1845 // the Scale Value, and the Offset Value of the operand when queried.
 1846 // COND_INTER causes an operand to generate six functions which return
 1847 // the encoding code (ie - encoding bits for the instruction)
 1848 // associated with each basic boolean condition for a conditional
 1849 // instruction.
 1850 //
 1851 // Instructions specify two basic values for encoding.  Again, a
 1852 // function is available to check if the constant displacement is an
 1853 // oop. They use the ins_encode keyword to specify their encoding
 1854 // classes (which must be a sequence of enc_class names, and their
 1855 // parameters, specified in the encoding block), and they use the
 1856 // opcode keyword to specify, in order, their primary, secondary, and
 1857 // tertiary opcode.  Only the opcode sections which a particular
 1858 // instruction needs for encoding need to be specified.
 1859 encode %{
 1860   // Build emit functions for each basic byte or larger field in the
 1861   // intel encoding scheme (opcode, rm, sib, immediate), and call them
 1862   // from C++ code in the enc_class source block.  Emit functions will
 1863   // live in the main source block for now.  In future, we can
 1864   // generalize this by adding a syntax that specifies the sizes of
 1865   // fields in an order, so that the adlc can build the emit functions
 1866   // automagically
 1867 
 1868   // Emit primary opcode
 1869   enc_class OpcP
 1870   %{
 1871     emit_opcode(cbuf, $primary);
 1872   %}
 1873 
 1874   // Emit secondary opcode
 1875   enc_class OpcS
 1876   %{
 1877     emit_opcode(cbuf, $secondary);
 1878   %}
 1879 
 1880   // Emit tertiary opcode
 1881   enc_class OpcT
 1882   %{
 1883     emit_opcode(cbuf, $tertiary);
 1884   %}
 1885 
 1886   // Emit opcode directly
 1887   enc_class Opcode(immI d8)
 1888   %{
 1889     emit_opcode(cbuf, $d8$$constant);
 1890   %}
 1891 
 1892   // Emit size prefix
 1893   enc_class SizePrefix
 1894   %{
 1895     emit_opcode(cbuf, 0x66);
 1896   %}
 1897 
 1898   enc_class reg(rRegI reg)
 1899   %{
 1900     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
 1901   %}
 1902 
 1903   enc_class reg_reg(rRegI dst, rRegI src)
 1904   %{
 1905     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1906   %}
 1907 
 1908   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
 1909   %{
 1910     emit_opcode(cbuf, $opcode$$constant);
 1911     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1912   %}
 1913 
 1914   enc_class cdql_enc(no_rax_rdx_RegI div)
 1915   %{
 1916     // Full implementation of Java idiv and irem; checks for
 1917     // special case as described in JVM spec., p.243 & p.271.
 1918     //
 1919     //         normal case                           special case
 1920     //
 1921     // input : rax: dividend                         min_int
 1922     //         reg: divisor                          -1
 1923     //
 1924     // output: rax: quotient  (= rax idiv reg)       min_int
 1925     //         rdx: remainder (= rax irem reg)       0
 1926     //
 1927     //  Code sequnce:
 1928     //
 1929     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1930     //    5:   75 07/08                jne    e <normal>
 1931     //    7:   33 d2                   xor    %edx,%edx
 1932     //  [div >= 8 -> offset + 1]
 1933     //  [REX_B]
 1934     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1935     //    c:   74 03/04                je     11 <done>
 1936     // 000000000000000e <normal>:
 1937     //    e:   99                      cltd
 1938     //  [div >= 8 -> offset + 1]
 1939     //  [REX_B]
 1940     //    f:   f7 f9                   idiv   $div
 1941     // 0000000000000011 <done>:
 1942     MacroAssembler _masm(&cbuf);
 1943     Label normal;
 1944     Label done;
 1945 
 1946     // cmp    $0x80000000,%eax
 1947     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1948 
 1949     // jne    e <normal>
 1950     __ jccb(Assembler::notEqual, normal);
 1951 
 1952     // xor    %edx,%edx
 1953     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1954 
 1955     // cmp    $0xffffffffffffffff,%ecx
 1956     __ cmpl($div$$Register, -1);
 1957 
 1958     // je     11 <done>
 1959     __ jccb(Assembler::equal, done);
 1960 
 1961     // <normal>
 1962     // cltd
 1963     __ bind(normal);
 1964     __ cdql();
 1965 
 1966     // idivl
 1967     // <done>
 1968     __ idivl($div$$Register);
 1969     __ bind(done);
 1970   %}
 1971 
 1972   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1973   %{
 1974     // Full implementation of Java ldiv and lrem; checks for
 1975     // special case as described in JVM spec., p.243 & p.271.
 1976     //
 1977     //         normal case                           special case
 1978     //
 1979     // input : rax: dividend                         min_long
 1980     //         reg: divisor                          -1
 1981     //
 1982     // output: rax: quotient  (= rax idiv reg)       min_long
 1983     //         rdx: remainder (= rax irem reg)       0
 1984     //
 1985     //  Code sequnce:
 1986     //
 1987     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1988     //    7:   00 00 80
 1989     //    a:   48 39 d0                cmp    %rdx,%rax
 1990     //    d:   75 08                   jne    17 <normal>
 1991     //    f:   33 d2                   xor    %edx,%edx
 1992     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 1993     //   15:   74 05                   je     1c <done>
 1994     // 0000000000000017 <normal>:
 1995     //   17:   48 99                   cqto
 1996     //   19:   48 f7 f9                idiv   $div
 1997     // 000000000000001c <done>:
 1998     MacroAssembler _masm(&cbuf);
 1999     Label normal;
 2000     Label done;
 2001 
 2002     // mov    $0x8000000000000000,%rdx
 2003     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 2004 
 2005     // cmp    %rdx,%rax
 2006     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 2007 
 2008     // jne    17 <normal>
 2009     __ jccb(Assembler::notEqual, normal);
 2010 
 2011     // xor    %edx,%edx
 2012     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 2013 
 2014     // cmp    $0xffffffffffffffff,$div
 2015     __ cmpq($div$$Register, -1);
 2016 
 2017     // je     1e <done>
 2018     __ jccb(Assembler::equal, done);
 2019 
 2020     // <normal>
 2021     // cqto
 2022     __ bind(normal);
 2023     __ cdqq();
 2024 
 2025     // idivq (note: must be emitted by the user of this rule)
 2026     // <done>
 2027     __ idivq($div$$Register);
 2028     __ bind(done);
 2029   %}
 2030 
 2031   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 2032   enc_class OpcSE(immI imm)
 2033   %{
 2034     // Emit primary opcode and set sign-extend bit
 2035     // Check for 8-bit immediate, and set sign extend bit in opcode
 2036     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2037       emit_opcode(cbuf, $primary | 0x02);
 2038     } else {
 2039       // 32-bit immediate
 2040       emit_opcode(cbuf, $primary);
 2041     }
 2042   %}
 2043 
 2044   enc_class OpcSErm(rRegI dst, immI imm)
 2045   %{
 2046     // OpcSEr/m
 2047     int dstenc = $dst$$reg;
 2048     if (dstenc >= 8) {
 2049       emit_opcode(cbuf, Assembler::REX_B);
 2050       dstenc -= 8;
 2051     }
 2052     // Emit primary opcode and set sign-extend bit
 2053     // Check for 8-bit immediate, and set sign extend bit in opcode
 2054     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2055       emit_opcode(cbuf, $primary | 0x02);
 2056     } else {
 2057       // 32-bit immediate
 2058       emit_opcode(cbuf, $primary);
 2059     }
 2060     // Emit r/m byte with secondary opcode, after primary opcode.
 2061     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2062   %}
 2063 
 2064   enc_class OpcSErm_wide(rRegL dst, immI imm)
 2065   %{
 2066     // OpcSEr/m
 2067     int dstenc = $dst$$reg;
 2068     if (dstenc < 8) {
 2069       emit_opcode(cbuf, Assembler::REX_W);
 2070     } else {
 2071       emit_opcode(cbuf, Assembler::REX_WB);
 2072       dstenc -= 8;
 2073     }
 2074     // Emit primary opcode and set sign-extend bit
 2075     // Check for 8-bit immediate, and set sign extend bit in opcode
 2076     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2077       emit_opcode(cbuf, $primary | 0x02);
 2078     } else {
 2079       // 32-bit immediate
 2080       emit_opcode(cbuf, $primary);
 2081     }
 2082     // Emit r/m byte with secondary opcode, after primary opcode.
 2083     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2084   %}
 2085 
 2086   enc_class Con8or32(immI imm)
 2087   %{
 2088     // Check for 8-bit immediate, and set sign extend bit in opcode
 2089     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2090       $$$emit8$imm$$constant;
 2091     } else {
 2092       // 32-bit immediate
 2093       $$$emit32$imm$$constant;
 2094     }
 2095   %}
 2096 
 2097   enc_class opc2_reg(rRegI dst)
 2098   %{
 2099     // BSWAP
 2100     emit_cc(cbuf, $secondary, $dst$$reg);
 2101   %}
 2102 
 2103   enc_class opc3_reg(rRegI dst)
 2104   %{
 2105     // BSWAP
 2106     emit_cc(cbuf, $tertiary, $dst$$reg);
 2107   %}
 2108 
 2109   enc_class reg_opc(rRegI div)
 2110   %{
 2111     // INC, DEC, IDIV, IMOD, JMP indirect, ...
 2112     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
 2113   %}
 2114 
 2115   enc_class enc_cmov(cmpOp cop)
 2116   %{
 2117     // CMOV
 2118     $$$emit8$primary;
 2119     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2120   %}
 2121 
 2122   enc_class enc_PartialSubtypeCheck()
 2123   %{
 2124     Register Rrdi = as_Register(RDI_enc); // result register
 2125     Register Rrax = as_Register(RAX_enc); // super class
 2126     Register Rrcx = as_Register(RCX_enc); // killed
 2127     Register Rrsi = as_Register(RSI_enc); // sub class
 2128     Label miss;
 2129     const bool set_cond_codes = true;
 2130 
 2131     MacroAssembler _masm(&cbuf);
 2132     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2133                                      NULL, &miss,
 2134                                      /*set_cond_codes:*/ true);
 2135     if ($primary) {
 2136       __ xorptr(Rrdi, Rrdi);
 2137     }
 2138     __ bind(miss);
 2139   %}
 2140 
 2141   enc_class clear_avx %{
 2142     debug_only(int off0 = cbuf.insts_size());
 2143     if (generate_vzeroupper(Compile::current())) {
 2144       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2145       // Clear upper bits of YMM registers when current compiled code uses
 2146       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2147       MacroAssembler _masm(&cbuf);
 2148       __ vzeroupper();
 2149     }
 2150     debug_only(int off1 = cbuf.insts_size());
 2151     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2152   %}
 2153 
 2154   enc_class Java_To_Runtime(method meth) %{
 2155     // No relocation needed
 2156     MacroAssembler _masm(&cbuf);
 2157     __ mov64(r10, (int64_t) $meth$$method);
 2158     __ call(r10);
 2159     __ post_call_nop();
 2160   %}
 2161 
 2162   enc_class Java_Static_Call(method meth)
 2163   %{
 2164     // JAVA STATIC CALL
 2165     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 2166     // determine who we intended to call.
 2167     MacroAssembler _masm(&cbuf);
 2168     cbuf.set_insts_mark();
 2169 
 2170     if (!_method) {
 2171       $$$emit8$primary;
 2172       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2173                      runtime_call_Relocation::spec(),
 2174                      RELOC_DISP32);
 2175     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2176       // The NOP here is purely to ensure that eliding a call to
 2177       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2178       __ addr_nop_5();
 2179       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2180     } else {
 2181       $$$emit8$primary;
 2182       int method_index = resolved_method_index(cbuf);
 2183       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2184                                                   : static_call_Relocation::spec(method_index);
 2185       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2186                      rspec, RELOC_DISP32);
 2187       address mark = cbuf.insts_mark();
 2188       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2189         // Calls of the same statically bound method can share
 2190         // a stub to the interpreter.
 2191         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2192       } else {
 2193         // Emit stubs for static call.
 2194         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2195         if (stub == NULL) {
 2196           ciEnv::current()->record_failure("CodeCache is full");
 2197           return;
 2198         }
 2199       }
 2200     }
 2201     _masm.clear_inst_mark();
 2202     __ post_call_nop();
 2203   %}
 2204 
 2205   enc_class Java_Dynamic_Call(method meth) %{
 2206     MacroAssembler _masm(&cbuf);
 2207     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2208     __ post_call_nop();
 2209   %}
 2210 
 2211   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2212   %{
 2213     // SAL, SAR, SHR
 2214     int dstenc = $dst$$reg;
 2215     if (dstenc >= 8) {
 2216       emit_opcode(cbuf, Assembler::REX_B);
 2217       dstenc -= 8;
 2218     }
 2219     $$$emit8$primary;
 2220     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2221     $$$emit8$shift$$constant;
 2222   %}
 2223 
 2224   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
 2225   %{
 2226     // SAL, SAR, SHR
 2227     int dstenc = $dst$$reg;
 2228     if (dstenc < 8) {
 2229       emit_opcode(cbuf, Assembler::REX_W);
 2230     } else {
 2231       emit_opcode(cbuf, Assembler::REX_WB);
 2232       dstenc -= 8;
 2233     }
 2234     $$$emit8$primary;
 2235     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2236     $$$emit8$shift$$constant;
 2237   %}
 2238 
 2239   enc_class load_immI(rRegI dst, immI src)
 2240   %{
 2241     int dstenc = $dst$$reg;
 2242     if (dstenc >= 8) {
 2243       emit_opcode(cbuf, Assembler::REX_B);
 2244       dstenc -= 8;
 2245     }
 2246     emit_opcode(cbuf, 0xB8 | dstenc);
 2247     $$$emit32$src$$constant;
 2248   %}
 2249 
 2250   enc_class load_immL(rRegL dst, immL src)
 2251   %{
 2252     int dstenc = $dst$$reg;
 2253     if (dstenc < 8) {
 2254       emit_opcode(cbuf, Assembler::REX_W);
 2255     } else {
 2256       emit_opcode(cbuf, Assembler::REX_WB);
 2257       dstenc -= 8;
 2258     }
 2259     emit_opcode(cbuf, 0xB8 | dstenc);
 2260     emit_d64(cbuf, $src$$constant);
 2261   %}
 2262 
 2263   enc_class load_immUL32(rRegL dst, immUL32 src)
 2264   %{
 2265     // same as load_immI, but this time we care about zeroes in the high word
 2266     int dstenc = $dst$$reg;
 2267     if (dstenc >= 8) {
 2268       emit_opcode(cbuf, Assembler::REX_B);
 2269       dstenc -= 8;
 2270     }
 2271     emit_opcode(cbuf, 0xB8 | dstenc);
 2272     $$$emit32$src$$constant;
 2273   %}
 2274 
 2275   enc_class load_immL32(rRegL dst, immL32 src)
 2276   %{
 2277     int dstenc = $dst$$reg;
 2278     if (dstenc < 8) {
 2279       emit_opcode(cbuf, Assembler::REX_W);
 2280     } else {
 2281       emit_opcode(cbuf, Assembler::REX_WB);
 2282       dstenc -= 8;
 2283     }
 2284     emit_opcode(cbuf, 0xC7);
 2285     emit_rm(cbuf, 0x03, 0x00, dstenc);
 2286     $$$emit32$src$$constant;
 2287   %}
 2288 
 2289   enc_class load_immP31(rRegP dst, immP32 src)
 2290   %{
 2291     // same as load_immI, but this time we care about zeroes in the high word
 2292     int dstenc = $dst$$reg;
 2293     if (dstenc >= 8) {
 2294       emit_opcode(cbuf, Assembler::REX_B);
 2295       dstenc -= 8;
 2296     }
 2297     emit_opcode(cbuf, 0xB8 | dstenc);
 2298     $$$emit32$src$$constant;
 2299   %}
 2300 
 2301   enc_class load_immP(rRegP dst, immP src)
 2302   %{
 2303     int dstenc = $dst$$reg;
 2304     if (dstenc < 8) {
 2305       emit_opcode(cbuf, Assembler::REX_W);
 2306     } else {
 2307       emit_opcode(cbuf, Assembler::REX_WB);
 2308       dstenc -= 8;
 2309     }
 2310     emit_opcode(cbuf, 0xB8 | dstenc);
 2311     // This next line should be generated from ADLC
 2312     if ($src->constant_reloc() != relocInfo::none) {
 2313       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
 2314     } else {
 2315       emit_d64(cbuf, $src$$constant);
 2316     }
 2317   %}
 2318 
 2319   enc_class Con32(immI src)
 2320   %{
 2321     // Output immediate
 2322     $$$emit32$src$$constant;
 2323   %}
 2324 
 2325   enc_class Con32F_as_bits(immF src)
 2326   %{
 2327     // Output Float immediate bits
 2328     jfloat jf = $src$$constant;
 2329     jint jf_as_bits = jint_cast(jf);
 2330     emit_d32(cbuf, jf_as_bits);
 2331   %}
 2332 
 2333   enc_class Con16(immI src)
 2334   %{
 2335     // Output immediate
 2336     $$$emit16$src$$constant;
 2337   %}
 2338 
 2339   // How is this different from Con32??? XXX
 2340   enc_class Con_d32(immI src)
 2341   %{
 2342     emit_d32(cbuf,$src$$constant);
 2343   %}
 2344 
 2345   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
 2346     // Output immediate memory reference
 2347     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2348     emit_d32(cbuf, 0x00);
 2349   %}
 2350 
 2351   enc_class lock_prefix()
 2352   %{
 2353     emit_opcode(cbuf, 0xF0); // lock
 2354   %}
 2355 
 2356   enc_class REX_mem(memory mem)
 2357   %{
 2358     if ($mem$$base >= 8) {
 2359       if ($mem$$index < 8) {
 2360         emit_opcode(cbuf, Assembler::REX_B);
 2361       } else {
 2362         emit_opcode(cbuf, Assembler::REX_XB);
 2363       }
 2364     } else {
 2365       if ($mem$$index >= 8) {
 2366         emit_opcode(cbuf, Assembler::REX_X);
 2367       }
 2368     }
 2369   %}
 2370 
 2371   enc_class REX_mem_wide(memory mem)
 2372   %{
 2373     if ($mem$$base >= 8) {
 2374       if ($mem$$index < 8) {
 2375         emit_opcode(cbuf, Assembler::REX_WB);
 2376       } else {
 2377         emit_opcode(cbuf, Assembler::REX_WXB);
 2378       }
 2379     } else {
 2380       if ($mem$$index < 8) {
 2381         emit_opcode(cbuf, Assembler::REX_W);
 2382       } else {
 2383         emit_opcode(cbuf, Assembler::REX_WX);
 2384       }
 2385     }
 2386   %}
 2387 
 2388   // for byte regs
 2389   enc_class REX_breg(rRegI reg)
 2390   %{
 2391     if ($reg$$reg >= 4) {
 2392       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2393     }
 2394   %}
 2395 
 2396   // for byte regs
 2397   enc_class REX_reg_breg(rRegI dst, rRegI src)
 2398   %{
 2399     if ($dst$$reg < 8) {
 2400       if ($src$$reg >= 4) {
 2401         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2402       }
 2403     } else {
 2404       if ($src$$reg < 8) {
 2405         emit_opcode(cbuf, Assembler::REX_R);
 2406       } else {
 2407         emit_opcode(cbuf, Assembler::REX_RB);
 2408       }
 2409     }
 2410   %}
 2411 
 2412   // for byte regs
 2413   enc_class REX_breg_mem(rRegI reg, memory mem)
 2414   %{
 2415     if ($reg$$reg < 8) {
 2416       if ($mem$$base < 8) {
 2417         if ($mem$$index >= 8) {
 2418           emit_opcode(cbuf, Assembler::REX_X);
 2419         } else if ($reg$$reg >= 4) {
 2420           emit_opcode(cbuf, Assembler::REX);
 2421         }
 2422       } else {
 2423         if ($mem$$index < 8) {
 2424           emit_opcode(cbuf, Assembler::REX_B);
 2425         } else {
 2426           emit_opcode(cbuf, Assembler::REX_XB);
 2427         }
 2428       }
 2429     } else {
 2430       if ($mem$$base < 8) {
 2431         if ($mem$$index < 8) {
 2432           emit_opcode(cbuf, Assembler::REX_R);
 2433         } else {
 2434           emit_opcode(cbuf, Assembler::REX_RX);
 2435         }
 2436       } else {
 2437         if ($mem$$index < 8) {
 2438           emit_opcode(cbuf, Assembler::REX_RB);
 2439         } else {
 2440           emit_opcode(cbuf, Assembler::REX_RXB);
 2441         }
 2442       }
 2443     }
 2444   %}
 2445 
 2446   enc_class REX_reg(rRegI reg)
 2447   %{
 2448     if ($reg$$reg >= 8) {
 2449       emit_opcode(cbuf, Assembler::REX_B);
 2450     }
 2451   %}
 2452 
 2453   enc_class REX_reg_wide(rRegI reg)
 2454   %{
 2455     if ($reg$$reg < 8) {
 2456       emit_opcode(cbuf, Assembler::REX_W);
 2457     } else {
 2458       emit_opcode(cbuf, Assembler::REX_WB);
 2459     }
 2460   %}
 2461 
 2462   enc_class REX_reg_reg(rRegI dst, rRegI src)
 2463   %{
 2464     if ($dst$$reg < 8) {
 2465       if ($src$$reg >= 8) {
 2466         emit_opcode(cbuf, Assembler::REX_B);
 2467       }
 2468     } else {
 2469       if ($src$$reg < 8) {
 2470         emit_opcode(cbuf, Assembler::REX_R);
 2471       } else {
 2472         emit_opcode(cbuf, Assembler::REX_RB);
 2473       }
 2474     }
 2475   %}
 2476 
 2477   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
 2478   %{
 2479     if ($dst$$reg < 8) {
 2480       if ($src$$reg < 8) {
 2481         emit_opcode(cbuf, Assembler::REX_W);
 2482       } else {
 2483         emit_opcode(cbuf, Assembler::REX_WB);
 2484       }
 2485     } else {
 2486       if ($src$$reg < 8) {
 2487         emit_opcode(cbuf, Assembler::REX_WR);
 2488       } else {
 2489         emit_opcode(cbuf, Assembler::REX_WRB);
 2490       }
 2491     }
 2492   %}
 2493 
 2494   enc_class REX_reg_mem(rRegI reg, memory mem)
 2495   %{
 2496     if ($reg$$reg < 8) {
 2497       if ($mem$$base < 8) {
 2498         if ($mem$$index >= 8) {
 2499           emit_opcode(cbuf, Assembler::REX_X);
 2500         }
 2501       } else {
 2502         if ($mem$$index < 8) {
 2503           emit_opcode(cbuf, Assembler::REX_B);
 2504         } else {
 2505           emit_opcode(cbuf, Assembler::REX_XB);
 2506         }
 2507       }
 2508     } else {
 2509       if ($mem$$base < 8) {
 2510         if ($mem$$index < 8) {
 2511           emit_opcode(cbuf, Assembler::REX_R);
 2512         } else {
 2513           emit_opcode(cbuf, Assembler::REX_RX);
 2514         }
 2515       } else {
 2516         if ($mem$$index < 8) {
 2517           emit_opcode(cbuf, Assembler::REX_RB);
 2518         } else {
 2519           emit_opcode(cbuf, Assembler::REX_RXB);
 2520         }
 2521       }
 2522     }
 2523   %}
 2524 
 2525   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
 2526   %{
 2527     if ($reg$$reg < 8) {
 2528       if ($mem$$base < 8) {
 2529         if ($mem$$index < 8) {
 2530           emit_opcode(cbuf, Assembler::REX_W);
 2531         } else {
 2532           emit_opcode(cbuf, Assembler::REX_WX);
 2533         }
 2534       } else {
 2535         if ($mem$$index < 8) {
 2536           emit_opcode(cbuf, Assembler::REX_WB);
 2537         } else {
 2538           emit_opcode(cbuf, Assembler::REX_WXB);
 2539         }
 2540       }
 2541     } else {
 2542       if ($mem$$base < 8) {
 2543         if ($mem$$index < 8) {
 2544           emit_opcode(cbuf, Assembler::REX_WR);
 2545         } else {
 2546           emit_opcode(cbuf, Assembler::REX_WRX);
 2547         }
 2548       } else {
 2549         if ($mem$$index < 8) {
 2550           emit_opcode(cbuf, Assembler::REX_WRB);
 2551         } else {
 2552           emit_opcode(cbuf, Assembler::REX_WRXB);
 2553         }
 2554       }
 2555     }
 2556   %}
 2557 
 2558   enc_class reg_mem(rRegI ereg, memory mem)
 2559   %{
 2560     // High registers handle in encode_RegMem
 2561     int reg = $ereg$$reg;
 2562     int base = $mem$$base;
 2563     int index = $mem$$index;
 2564     int scale = $mem$$scale;
 2565     int disp = $mem$$disp;
 2566     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2567 
 2568     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
 2569   %}
 2570 
 2571   enc_class RM_opc_mem(immI rm_opcode, memory mem)
 2572   %{
 2573     int rm_byte_opcode = $rm_opcode$$constant;
 2574 
 2575     // High registers handle in encode_RegMem
 2576     int base = $mem$$base;
 2577     int index = $mem$$index;
 2578     int scale = $mem$$scale;
 2579     int displace = $mem$$disp;
 2580 
 2581     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
 2582                                             // working with static
 2583                                             // globals
 2584     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
 2585                   disp_reloc);
 2586   %}
 2587 
 2588   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
 2589   %{
 2590     int reg_encoding = $dst$$reg;
 2591     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2592     int index        = 0x04;            // 0x04 indicates no index
 2593     int scale        = 0x00;            // 0x00 indicates no scale
 2594     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2595     relocInfo::relocType disp_reloc = relocInfo::none;
 2596     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
 2597                   disp_reloc);
 2598   %}
 2599 
 2600   enc_class neg_reg(rRegI dst)
 2601   %{
 2602     int dstenc = $dst$$reg;
 2603     if (dstenc >= 8) {
 2604       emit_opcode(cbuf, Assembler::REX_B);
 2605       dstenc -= 8;
 2606     }
 2607     // NEG $dst
 2608     emit_opcode(cbuf, 0xF7);
 2609     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2610   %}
 2611 
 2612   enc_class neg_reg_wide(rRegI dst)
 2613   %{
 2614     int dstenc = $dst$$reg;
 2615     if (dstenc < 8) {
 2616       emit_opcode(cbuf, Assembler::REX_W);
 2617     } else {
 2618       emit_opcode(cbuf, Assembler::REX_WB);
 2619       dstenc -= 8;
 2620     }
 2621     // NEG $dst
 2622     emit_opcode(cbuf, 0xF7);
 2623     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2624   %}
 2625 
 2626   enc_class setLT_reg(rRegI dst)
 2627   %{
 2628     int dstenc = $dst$$reg;
 2629     if (dstenc >= 8) {
 2630       emit_opcode(cbuf, Assembler::REX_B);
 2631       dstenc -= 8;
 2632     } else if (dstenc >= 4) {
 2633       emit_opcode(cbuf, Assembler::REX);
 2634     }
 2635     // SETLT $dst
 2636     emit_opcode(cbuf, 0x0F);
 2637     emit_opcode(cbuf, 0x9C);
 2638     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2639   %}
 2640 
 2641   enc_class setNZ_reg(rRegI dst)
 2642   %{
 2643     int dstenc = $dst$$reg;
 2644     if (dstenc >= 8) {
 2645       emit_opcode(cbuf, Assembler::REX_B);
 2646       dstenc -= 8;
 2647     } else if (dstenc >= 4) {
 2648       emit_opcode(cbuf, Assembler::REX);
 2649     }
 2650     // SETNZ $dst
 2651     emit_opcode(cbuf, 0x0F);
 2652     emit_opcode(cbuf, 0x95);
 2653     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2654   %}
 2655 
 2656 
 2657   // Compare the lonogs and set -1, 0, or 1 into dst
 2658   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
 2659   %{
 2660     int src1enc = $src1$$reg;
 2661     int src2enc = $src2$$reg;
 2662     int dstenc = $dst$$reg;
 2663 
 2664     // cmpq $src1, $src2
 2665     if (src1enc < 8) {
 2666       if (src2enc < 8) {
 2667         emit_opcode(cbuf, Assembler::REX_W);
 2668       } else {
 2669         emit_opcode(cbuf, Assembler::REX_WB);
 2670       }
 2671     } else {
 2672       if (src2enc < 8) {
 2673         emit_opcode(cbuf, Assembler::REX_WR);
 2674       } else {
 2675         emit_opcode(cbuf, Assembler::REX_WRB);
 2676       }
 2677     }
 2678     emit_opcode(cbuf, 0x3B);
 2679     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
 2680 
 2681     // movl $dst, -1
 2682     if (dstenc >= 8) {
 2683       emit_opcode(cbuf, Assembler::REX_B);
 2684     }
 2685     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
 2686     emit_d32(cbuf, -1);
 2687 
 2688     // jl,s done
 2689     emit_opcode(cbuf, 0x7C);
 2690     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
 2691 
 2692     // setne $dst
 2693     if (dstenc >= 4) {
 2694       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
 2695     }
 2696     emit_opcode(cbuf, 0x0F);
 2697     emit_opcode(cbuf, 0x95);
 2698     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
 2699 
 2700     // movzbl $dst, $dst
 2701     if (dstenc >= 4) {
 2702       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
 2703     }
 2704     emit_opcode(cbuf, 0x0F);
 2705     emit_opcode(cbuf, 0xB6);
 2706     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
 2707   %}
 2708 
 2709   enc_class Push_ResultXD(regD dst) %{
 2710     MacroAssembler _masm(&cbuf);
 2711     __ fstp_d(Address(rsp, 0));
 2712     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2713     __ addptr(rsp, 8);
 2714   %}
 2715 
 2716   enc_class Push_SrcXD(regD src) %{
 2717     MacroAssembler _masm(&cbuf);
 2718     __ subptr(rsp, 8);
 2719     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2720     __ fld_d(Address(rsp, 0));
 2721   %}
 2722 
 2723 
 2724   enc_class enc_rethrow()
 2725   %{
 2726     cbuf.set_insts_mark();
 2727     emit_opcode(cbuf, 0xE9); // jmp entry
 2728     emit_d32_reloc(cbuf,
 2729                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
 2730                    runtime_call_Relocation::spec(),
 2731                    RELOC_DISP32);
 2732   %}
 2733 
 2734 %}
 2735 
 2736 
 2737 
 2738 //----------FRAME--------------------------------------------------------------
 2739 // Definition of frame structure and management information.
 2740 //
 2741 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2742 //                             |   (to get allocators register number
 2743 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2744 //  r   CALLER     |        |
 2745 //  o     |        +--------+      pad to even-align allocators stack-slot
 2746 //  w     V        |  pad0  |        numbers; owned by CALLER
 2747 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 2748 //  h     ^        |   in   |  5
 2749 //        |        |  args  |  4   Holes in incoming args owned by SELF
 2750 //  |     |        |        |  3
 2751 //  |     |        +--------+
 2752 //  V     |        | old out|      Empty on Intel, window on Sparc
 2753 //        |    old |preserve|      Must be even aligned.
 2754 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 2755 //        |        |   in   |  3   area for Intel ret address
 2756 //     Owned by    |preserve|      Empty on Sparc.
 2757 //       SELF      +--------+
 2758 //        |        |  pad2  |  2   pad to align old SP
 2759 //        |        +--------+  1
 2760 //        |        | locks  |  0
 2761 //        |        +--------+----> OptoReg::stack0(), even aligned
 2762 //        |        |  pad1  | 11   pad to align new SP
 2763 //        |        +--------+
 2764 //        |        |        | 10
 2765 //        |        | spills |  9   spills
 2766 //        V        |        |  8   (pad0 slot for callee)
 2767 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 2768 //        ^        |  out   |  7
 2769 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 2770 //     Owned by    +--------+
 2771 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 2772 //        |    new |preserve|      Must be even-aligned.
 2773 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 2774 //        |        |        |
 2775 //
 2776 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 2777 //         known from SELF's arguments and the Java calling convention.
 2778 //         Region 6-7 is determined per call site.
 2779 // Note 2: If the calling convention leaves holes in the incoming argument
 2780 //         area, those holes are owned by SELF.  Holes in the outgoing area
 2781 //         are owned by the CALLEE.  Holes should not be necessary in the
 2782 //         incoming area, as the Java calling convention is completely under
 2783 //         the control of the AD file.  Doubles can be sorted and packed to
 2784 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 2785 //         varargs C calling conventions.
 2786 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 2787 //         even aligned with pad0 as needed.
 2788 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 2789 //         region 6-11 is even aligned; it may be padded out more so that
 2790 //         the region from SP to FP meets the minimum stack alignment.
 2791 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 2792 //         alignment.  Region 11, pad1, may be dynamically extended so that
 2793 //         SP meets the minimum alignment.
 2794 
 2795 frame
 2796 %{
 2797   // These three registers define part of the calling convention
 2798   // between compiled code and the interpreter.
 2799   inline_cache_reg(RAX);                // Inline Cache Register
 2800 
 2801   // Optional: name the operand used by cisc-spilling to access
 2802   // [stack_pointer + offset]
 2803   cisc_spilling_operand_name(indOffset32);
 2804 
 2805   // Number of stack slots consumed by locking an object
 2806   sync_stack_slots(2);
 2807 
 2808   // Compiled code's Frame Pointer
 2809   frame_pointer(RSP);
 2810 
 2811   // Interpreter stores its frame pointer in a register which is
 2812   // stored to the stack by I2CAdaptors.
 2813   // I2CAdaptors convert from interpreted java to compiled java.
 2814   interpreter_frame_pointer(RBP);
 2815 
 2816   // Stack alignment requirement
 2817   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 2818 
 2819   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 2820   // for calls to C.  Supports the var-args backing area for register parms.
 2821   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 2822 
 2823   // The after-PROLOG location of the return address.  Location of
 2824   // return address specifies a type (REG or STACK) and a number
 2825   // representing the register number (i.e. - use a register name) or
 2826   // stack slot.
 2827   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 2828   // Otherwise, it is above the locks and verification slot and alignment word
 2829   return_addr(STACK - 2 +
 2830               align_up((Compile::current()->in_preserve_stack_slots() +
 2831                         Compile::current()->fixed_slots()),
 2832                        stack_alignment_in_slots()));
 2833 
 2834   // Location of compiled Java return values.  Same as C for now.
 2835   return_value
 2836   %{
 2837     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 2838            "only return normal values");
 2839 
 2840     static const int lo[Op_RegL + 1] = {
 2841       0,
 2842       0,
 2843       RAX_num,  // Op_RegN
 2844       RAX_num,  // Op_RegI
 2845       RAX_num,  // Op_RegP
 2846       XMM0_num, // Op_RegF
 2847       XMM0_num, // Op_RegD
 2848       RAX_num   // Op_RegL
 2849     };
 2850     static const int hi[Op_RegL + 1] = {
 2851       0,
 2852       0,
 2853       OptoReg::Bad, // Op_RegN
 2854       OptoReg::Bad, // Op_RegI
 2855       RAX_H_num,    // Op_RegP
 2856       OptoReg::Bad, // Op_RegF
 2857       XMM0b_num,    // Op_RegD
 2858       RAX_H_num     // Op_RegL
 2859     };
 2860     // Excluded flags and vector registers.
 2861     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2862     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2863   %}
 2864 %}
 2865 
 2866 //----------ATTRIBUTES---------------------------------------------------------
 2867 //----------Operand Attributes-------------------------------------------------
 2868 op_attrib op_cost(0);        // Required cost attribute
 2869 
 2870 //----------Instruction Attributes---------------------------------------------
 2871 ins_attrib ins_cost(100);       // Required cost attribute
 2872 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2873 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2874                                 // a non-matching short branch variant
 2875                                 // of some long branch?
 2876 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2877                                 // be a power of 2) specifies the
 2878                                 // alignment that some part of the
 2879                                 // instruction (not necessarily the
 2880                                 // start) requires.  If > 1, a
 2881                                 // compute_padding() function must be
 2882                                 // provided for the instruction
 2883 
 2884 //----------OPERANDS-----------------------------------------------------------
 2885 // Operand definitions must precede instruction definitions for correct parsing
 2886 // in the ADLC because operands constitute user defined types which are used in
 2887 // instruction definitions.
 2888 
 2889 //----------Simple Operands----------------------------------------------------
 2890 // Immediate Operands
 2891 // Integer Immediate
 2892 operand immI()
 2893 %{
 2894   match(ConI);
 2895 
 2896   op_cost(10);
 2897   format %{ %}
 2898   interface(CONST_INTER);
 2899 %}
 2900 
 2901 // Constant for test vs zero
 2902 operand immI_0()
 2903 %{
 2904   predicate(n->get_int() == 0);
 2905   match(ConI);
 2906 
 2907   op_cost(0);
 2908   format %{ %}
 2909   interface(CONST_INTER);
 2910 %}
 2911 
 2912 // Constant for increment
 2913 operand immI_1()
 2914 %{
 2915   predicate(n->get_int() == 1);
 2916   match(ConI);
 2917 
 2918   op_cost(0);
 2919   format %{ %}
 2920   interface(CONST_INTER);
 2921 %}
 2922 
 2923 // Constant for decrement
 2924 operand immI_M1()
 2925 %{
 2926   predicate(n->get_int() == -1);
 2927   match(ConI);
 2928 
 2929   op_cost(0);
 2930   format %{ %}
 2931   interface(CONST_INTER);
 2932 %}
 2933 
 2934 operand immI_2()
 2935 %{
 2936   predicate(n->get_int() == 2);
 2937   match(ConI);
 2938 
 2939   op_cost(0);
 2940   format %{ %}
 2941   interface(CONST_INTER);
 2942 %}
 2943 
 2944 operand immI_4()
 2945 %{
 2946   predicate(n->get_int() == 4);
 2947   match(ConI);
 2948 
 2949   op_cost(0);
 2950   format %{ %}
 2951   interface(CONST_INTER);
 2952 %}
 2953 
 2954 operand immI_8()
 2955 %{
 2956   predicate(n->get_int() == 8);
 2957   match(ConI);
 2958 
 2959   op_cost(0);
 2960   format %{ %}
 2961   interface(CONST_INTER);
 2962 %}
 2963 
 2964 // Valid scale values for addressing modes
 2965 operand immI2()
 2966 %{
 2967   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2968   match(ConI);
 2969 
 2970   format %{ %}
 2971   interface(CONST_INTER);
 2972 %}
 2973 
 2974 operand immU7()
 2975 %{
 2976   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2977   match(ConI);
 2978 
 2979   op_cost(5);
 2980   format %{ %}
 2981   interface(CONST_INTER);
 2982 %}
 2983 
 2984 operand immI8()
 2985 %{
 2986   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2987   match(ConI);
 2988 
 2989   op_cost(5);
 2990   format %{ %}
 2991   interface(CONST_INTER);
 2992 %}
 2993 
 2994 operand immU8()
 2995 %{
 2996   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 2997   match(ConI);
 2998 
 2999   op_cost(5);
 3000   format %{ %}
 3001   interface(CONST_INTER);
 3002 %}
 3003 
 3004 operand immI16()
 3005 %{
 3006   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3007   match(ConI);
 3008 
 3009   op_cost(10);
 3010   format %{ %}
 3011   interface(CONST_INTER);
 3012 %}
 3013 
 3014 // Int Immediate non-negative
 3015 operand immU31()
 3016 %{
 3017   predicate(n->get_int() >= 0);
 3018   match(ConI);
 3019 
 3020   op_cost(0);
 3021   format %{ %}
 3022   interface(CONST_INTER);
 3023 %}
 3024 
 3025 // Constant for long shifts
 3026 operand immI_32()
 3027 %{
 3028   predicate( n->get_int() == 32 );
 3029   match(ConI);
 3030 
 3031   op_cost(0);
 3032   format %{ %}
 3033   interface(CONST_INTER);
 3034 %}
 3035 
 3036 // Constant for long shifts
 3037 operand immI_64()
 3038 %{
 3039   predicate( n->get_int() == 64 );
 3040   match(ConI);
 3041 
 3042   op_cost(0);
 3043   format %{ %}
 3044   interface(CONST_INTER);
 3045 %}
 3046 
 3047 // Pointer Immediate
 3048 operand immP()
 3049 %{
 3050   match(ConP);
 3051 
 3052   op_cost(10);
 3053   format %{ %}
 3054   interface(CONST_INTER);
 3055 %}
 3056 
 3057 // NULL Pointer Immediate
 3058 operand immP0()
 3059 %{
 3060   predicate(n->get_ptr() == 0);
 3061   match(ConP);
 3062 
 3063   op_cost(5);
 3064   format %{ %}
 3065   interface(CONST_INTER);
 3066 %}
 3067 
 3068 // Pointer Immediate
 3069 operand immN() %{
 3070   match(ConN);
 3071 
 3072   op_cost(10);
 3073   format %{ %}
 3074   interface(CONST_INTER);
 3075 %}
 3076 
 3077 operand immNKlass() %{
 3078   match(ConNKlass);
 3079 
 3080   op_cost(10);
 3081   format %{ %}
 3082   interface(CONST_INTER);
 3083 %}
 3084 
 3085 // NULL Pointer Immediate
 3086 operand immN0() %{
 3087   predicate(n->get_narrowcon() == 0);
 3088   match(ConN);
 3089 
 3090   op_cost(5);
 3091   format %{ %}
 3092   interface(CONST_INTER);
 3093 %}
 3094 
 3095 operand immP31()
 3096 %{
 3097   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3098             && (n->get_ptr() >> 31) == 0);
 3099   match(ConP);
 3100 
 3101   op_cost(5);
 3102   format %{ %}
 3103   interface(CONST_INTER);
 3104 %}
 3105 
 3106 
 3107 // Long Immediate
 3108 operand immL()
 3109 %{
 3110   match(ConL);
 3111 
 3112   op_cost(20);
 3113   format %{ %}
 3114   interface(CONST_INTER);
 3115 %}
 3116 
 3117 // Long Immediate 8-bit
 3118 operand immL8()
 3119 %{
 3120   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 3121   match(ConL);
 3122 
 3123   op_cost(5);
 3124   format %{ %}
 3125   interface(CONST_INTER);
 3126 %}
 3127 
 3128 // Long Immediate 32-bit unsigned
 3129 operand immUL32()
 3130 %{
 3131   predicate(n->get_long() == (unsigned int) (n->get_long()));
 3132   match(ConL);
 3133 
 3134   op_cost(10);
 3135   format %{ %}
 3136   interface(CONST_INTER);
 3137 %}
 3138 
 3139 // Long Immediate 32-bit signed
 3140 operand immL32()
 3141 %{
 3142   predicate(n->get_long() == (int) (n->get_long()));
 3143   match(ConL);
 3144 
 3145   op_cost(15);
 3146   format %{ %}
 3147   interface(CONST_INTER);
 3148 %}
 3149 
 3150 operand immL_Pow2()
 3151 %{
 3152   predicate(is_power_of_2((julong)n->get_long()));
 3153   match(ConL);
 3154 
 3155   op_cost(15);
 3156   format %{ %}
 3157   interface(CONST_INTER);
 3158 %}
 3159 
 3160 operand immL_NotPow2()
 3161 %{
 3162   predicate(is_power_of_2((julong)~n->get_long()));
 3163   match(ConL);
 3164 
 3165   op_cost(15);
 3166   format %{ %}
 3167   interface(CONST_INTER);
 3168 %}
 3169 
 3170 // Long Immediate zero
 3171 operand immL0()
 3172 %{
 3173   predicate(n->get_long() == 0L);
 3174   match(ConL);
 3175 
 3176   op_cost(10);
 3177   format %{ %}
 3178   interface(CONST_INTER);
 3179 %}
 3180 
 3181 // Constant for increment
 3182 operand immL1()
 3183 %{
 3184   predicate(n->get_long() == 1);
 3185   match(ConL);
 3186 
 3187   format %{ %}
 3188   interface(CONST_INTER);
 3189 %}
 3190 
 3191 // Constant for decrement
 3192 operand immL_M1()
 3193 %{
 3194   predicate(n->get_long() == -1);
 3195   match(ConL);
 3196 
 3197   format %{ %}
 3198   interface(CONST_INTER);
 3199 %}
 3200 
 3201 // Long Immediate: the value 10
 3202 operand immL10()
 3203 %{
 3204   predicate(n->get_long() == 10);
 3205   match(ConL);
 3206 
 3207   format %{ %}
 3208   interface(CONST_INTER);
 3209 %}
 3210 
 3211 // Long immediate from 0 to 127.
 3212 // Used for a shorter form of long mul by 10.
 3213 operand immL_127()
 3214 %{
 3215   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 3216   match(ConL);
 3217 
 3218   op_cost(10);
 3219   format %{ %}
 3220   interface(CONST_INTER);
 3221 %}
 3222 
 3223 // Long Immediate: low 32-bit mask
 3224 operand immL_32bits()
 3225 %{
 3226   predicate(n->get_long() == 0xFFFFFFFFL);
 3227   match(ConL);
 3228   op_cost(20);
 3229 
 3230   format %{ %}
 3231   interface(CONST_INTER);
 3232 %}
 3233 
 3234 // Int Immediate: 2^n-1, positive
 3235 operand immI_Pow2M1()
 3236 %{
 3237   predicate((n->get_int() > 0)
 3238             && is_power_of_2((juint)n->get_int() + 1));
 3239   match(ConI);
 3240 
 3241   op_cost(20);
 3242   format %{ %}
 3243   interface(CONST_INTER);
 3244 %}
 3245 
 3246 // Float Immediate zero
 3247 operand immF0()
 3248 %{
 3249   predicate(jint_cast(n->getf()) == 0);
 3250   match(ConF);
 3251 
 3252   op_cost(5);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 // Float Immediate
 3258 operand immF()
 3259 %{
 3260   match(ConF);
 3261 
 3262   op_cost(15);
 3263   format %{ %}
 3264   interface(CONST_INTER);
 3265 %}
 3266 
 3267 // Double Immediate zero
 3268 operand immD0()
 3269 %{
 3270   predicate(jlong_cast(n->getd()) == 0);
 3271   match(ConD);
 3272 
 3273   op_cost(5);
 3274   format %{ %}
 3275   interface(CONST_INTER);
 3276 %}
 3277 
 3278 // Double Immediate
 3279 operand immD()
 3280 %{
 3281   match(ConD);
 3282 
 3283   op_cost(15);
 3284   format %{ %}
 3285   interface(CONST_INTER);
 3286 %}
 3287 
 3288 // Immediates for special shifts (sign extend)
 3289 
 3290 // Constants for increment
 3291 operand immI_16()
 3292 %{
 3293   predicate(n->get_int() == 16);
 3294   match(ConI);
 3295 
 3296   format %{ %}
 3297   interface(CONST_INTER);
 3298 %}
 3299 
 3300 operand immI_24()
 3301 %{
 3302   predicate(n->get_int() == 24);
 3303   match(ConI);
 3304 
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 // Constant for byte-wide masking
 3310 operand immI_255()
 3311 %{
 3312   predicate(n->get_int() == 255);
 3313   match(ConI);
 3314 
 3315   format %{ %}
 3316   interface(CONST_INTER);
 3317 %}
 3318 
 3319 // Constant for short-wide masking
 3320 operand immI_65535()
 3321 %{
 3322   predicate(n->get_int() == 65535);
 3323   match(ConI);
 3324 
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 // Constant for byte-wide masking
 3330 operand immL_255()
 3331 %{
 3332   predicate(n->get_long() == 255);
 3333   match(ConL);
 3334 
 3335   format %{ %}
 3336   interface(CONST_INTER);
 3337 %}
 3338 
 3339 // Constant for short-wide masking
 3340 operand immL_65535()
 3341 %{
 3342   predicate(n->get_long() == 65535);
 3343   match(ConL);
 3344 
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand kReg()
 3350 %{
 3351   constraint(ALLOC_IN_RC(vectmask_reg));
 3352   match(RegVectMask);
 3353   format %{%}
 3354   interface(REG_INTER);
 3355 %}
 3356 
 3357 operand kReg_K1()
 3358 %{
 3359   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3360   match(RegVectMask);
 3361   format %{%}
 3362   interface(REG_INTER);
 3363 %}
 3364 
 3365 operand kReg_K2()
 3366 %{
 3367   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3368   match(RegVectMask);
 3369   format %{%}
 3370   interface(REG_INTER);
 3371 %}
 3372 
 3373 // Special Registers
 3374 operand kReg_K3()
 3375 %{
 3376   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3377   match(RegVectMask);
 3378   format %{%}
 3379   interface(REG_INTER);
 3380 %}
 3381 
 3382 operand kReg_K4()
 3383 %{
 3384   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3385   match(RegVectMask);
 3386   format %{%}
 3387   interface(REG_INTER);
 3388 %}
 3389 
 3390 operand kReg_K5()
 3391 %{
 3392   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3393   match(RegVectMask);
 3394   format %{%}
 3395   interface(REG_INTER);
 3396 %}
 3397 
 3398 operand kReg_K6()
 3399 %{
 3400   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3401   match(RegVectMask);
 3402   format %{%}
 3403   interface(REG_INTER);
 3404 %}
 3405 
 3406 // Special Registers
 3407 operand kReg_K7()
 3408 %{
 3409   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3410   match(RegVectMask);
 3411   format %{%}
 3412   interface(REG_INTER);
 3413 %}
 3414 
 3415 // Register Operands
 3416 // Integer Register
 3417 operand rRegI()
 3418 %{
 3419   constraint(ALLOC_IN_RC(int_reg));
 3420   match(RegI);
 3421 
 3422   match(rax_RegI);
 3423   match(rbx_RegI);
 3424   match(rcx_RegI);
 3425   match(rdx_RegI);
 3426   match(rdi_RegI);
 3427 
 3428   format %{ %}
 3429   interface(REG_INTER);
 3430 %}
 3431 
 3432 // Special Registers
 3433 operand rax_RegI()
 3434 %{
 3435   constraint(ALLOC_IN_RC(int_rax_reg));
 3436   match(RegI);
 3437   match(rRegI);
 3438 
 3439   format %{ "RAX" %}
 3440   interface(REG_INTER);
 3441 %}
 3442 
 3443 // Special Registers
 3444 operand rbx_RegI()
 3445 %{
 3446   constraint(ALLOC_IN_RC(int_rbx_reg));
 3447   match(RegI);
 3448   match(rRegI);
 3449 
 3450   format %{ "RBX" %}
 3451   interface(REG_INTER);
 3452 %}
 3453 
 3454 operand rcx_RegI()
 3455 %{
 3456   constraint(ALLOC_IN_RC(int_rcx_reg));
 3457   match(RegI);
 3458   match(rRegI);
 3459 
 3460   format %{ "RCX" %}
 3461   interface(REG_INTER);
 3462 %}
 3463 
 3464 operand rdx_RegI()
 3465 %{
 3466   constraint(ALLOC_IN_RC(int_rdx_reg));
 3467   match(RegI);
 3468   match(rRegI);
 3469 
 3470   format %{ "RDX" %}
 3471   interface(REG_INTER);
 3472 %}
 3473 
 3474 operand rdi_RegI()
 3475 %{
 3476   constraint(ALLOC_IN_RC(int_rdi_reg));
 3477   match(RegI);
 3478   match(rRegI);
 3479 
 3480   format %{ "RDI" %}
 3481   interface(REG_INTER);
 3482 %}
 3483 
 3484 operand no_rax_rdx_RegI()
 3485 %{
 3486   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 3487   match(RegI);
 3488   match(rbx_RegI);
 3489   match(rcx_RegI);
 3490   match(rdi_RegI);
 3491 
 3492   format %{ %}
 3493   interface(REG_INTER);
 3494 %}
 3495 
 3496 operand no_rbp_r13_RegI()
 3497 %{
 3498   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 3499   match(RegI);
 3500   match(rRegI);
 3501   match(rax_RegI);
 3502   match(rbx_RegI);
 3503   match(rcx_RegI);
 3504   match(rdx_RegI);
 3505   match(rdi_RegI);
 3506 
 3507   format %{ %}
 3508   interface(REG_INTER);
 3509 %}
 3510 
 3511 // Pointer Register
 3512 operand any_RegP()
 3513 %{
 3514   constraint(ALLOC_IN_RC(any_reg));
 3515   match(RegP);
 3516   match(rax_RegP);
 3517   match(rbx_RegP);
 3518   match(rdi_RegP);
 3519   match(rsi_RegP);
 3520   match(rbp_RegP);
 3521   match(r15_RegP);
 3522   match(rRegP);
 3523 
 3524   format %{ %}
 3525   interface(REG_INTER);
 3526 %}
 3527 
 3528 operand rRegP()
 3529 %{
 3530   constraint(ALLOC_IN_RC(ptr_reg));
 3531   match(RegP);
 3532   match(rax_RegP);
 3533   match(rbx_RegP);
 3534   match(rdi_RegP);
 3535   match(rsi_RegP);
 3536   match(rbp_RegP);  // See Q&A below about
 3537   match(r15_RegP);  // r15_RegP and rbp_RegP.
 3538 
 3539   format %{ %}
 3540   interface(REG_INTER);
 3541 %}
 3542 
 3543 operand rRegN() %{
 3544   constraint(ALLOC_IN_RC(int_reg));
 3545   match(RegN);
 3546 
 3547   format %{ %}
 3548   interface(REG_INTER);
 3549 %}
 3550 
 3551 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 3552 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 3553 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 3554 // The output of an instruction is controlled by the allocator, which respects
 3555 // register class masks, not match rules.  Unless an instruction mentions
 3556 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 3557 // by the allocator as an input.
 3558 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 3559 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 3560 // result, RBP is not included in the output of the instruction either.
 3561 
 3562 operand no_rax_RegP()
 3563 %{
 3564   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 3565   match(RegP);
 3566   match(rbx_RegP);
 3567   match(rsi_RegP);
 3568   match(rdi_RegP);
 3569 
 3570   format %{ %}
 3571   interface(REG_INTER);
 3572 %}
 3573 
 3574 // This operand is not allowed to use RBP even if
 3575 // RBP is not used to hold the frame pointer.
 3576 operand no_rbp_RegP()
 3577 %{
 3578   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 3579   match(RegP);
 3580   match(rbx_RegP);
 3581   match(rsi_RegP);
 3582   match(rdi_RegP);
 3583 
 3584   format %{ %}
 3585   interface(REG_INTER);
 3586 %}
 3587 
 3588 operand no_rax_rbx_RegP()
 3589 %{
 3590   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 3591   match(RegP);
 3592   match(rsi_RegP);
 3593   match(rdi_RegP);
 3594 
 3595   format %{ %}
 3596   interface(REG_INTER);
 3597 %}
 3598 
 3599 // Special Registers
 3600 // Return a pointer value
 3601 operand rax_RegP()
 3602 %{
 3603   constraint(ALLOC_IN_RC(ptr_rax_reg));
 3604   match(RegP);
 3605   match(rRegP);
 3606 
 3607   format %{ %}
 3608   interface(REG_INTER);
 3609 %}
 3610 
 3611 // Special Registers
 3612 // Return a compressed pointer value
 3613 operand rax_RegN()
 3614 %{
 3615   constraint(ALLOC_IN_RC(int_rax_reg));
 3616   match(RegN);
 3617   match(rRegN);
 3618 
 3619   format %{ %}
 3620   interface(REG_INTER);
 3621 %}
 3622 
 3623 // Used in AtomicAdd
 3624 operand rbx_RegP()
 3625 %{
 3626   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 3627   match(RegP);
 3628   match(rRegP);
 3629 
 3630   format %{ %}
 3631   interface(REG_INTER);
 3632 %}
 3633 
 3634 operand rsi_RegP()
 3635 %{
 3636   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 3637   match(RegP);
 3638   match(rRegP);
 3639 
 3640   format %{ %}
 3641   interface(REG_INTER);
 3642 %}
 3643 
 3644 operand rbp_RegP()
 3645 %{
 3646   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 3647   match(RegP);
 3648   match(rRegP);
 3649 
 3650   format %{ %}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 // Used in rep stosq
 3655 operand rdi_RegP()
 3656 %{
 3657   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 3658   match(RegP);
 3659   match(rRegP);
 3660 
 3661   format %{ %}
 3662   interface(REG_INTER);
 3663 %}
 3664 
 3665 operand r15_RegP()
 3666 %{
 3667   constraint(ALLOC_IN_RC(ptr_r15_reg));
 3668   match(RegP);
 3669   match(rRegP);
 3670 
 3671   format %{ %}
 3672   interface(REG_INTER);
 3673 %}
 3674 
 3675 operand rRegL()
 3676 %{
 3677   constraint(ALLOC_IN_RC(long_reg));
 3678   match(RegL);
 3679   match(rax_RegL);
 3680   match(rdx_RegL);
 3681 
 3682   format %{ %}
 3683   interface(REG_INTER);
 3684 %}
 3685 
 3686 // Special Registers
 3687 operand no_rax_rdx_RegL()
 3688 %{
 3689   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 3690   match(RegL);
 3691   match(rRegL);
 3692 
 3693   format %{ %}
 3694   interface(REG_INTER);
 3695 %}
 3696 
 3697 operand rax_RegL()
 3698 %{
 3699   constraint(ALLOC_IN_RC(long_rax_reg));
 3700   match(RegL);
 3701   match(rRegL);
 3702 
 3703   format %{ "RAX" %}
 3704   interface(REG_INTER);
 3705 %}
 3706 
 3707 operand rcx_RegL()
 3708 %{
 3709   constraint(ALLOC_IN_RC(long_rcx_reg));
 3710   match(RegL);
 3711   match(rRegL);
 3712 
 3713   format %{ %}
 3714   interface(REG_INTER);
 3715 %}
 3716 
 3717 operand rdx_RegL()
 3718 %{
 3719   constraint(ALLOC_IN_RC(long_rdx_reg));
 3720   match(RegL);
 3721   match(rRegL);
 3722 
 3723   format %{ %}
 3724   interface(REG_INTER);
 3725 %}
 3726 
 3727 operand no_rbp_r13_RegL()
 3728 %{
 3729   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 3730   match(RegL);
 3731   match(rRegL);
 3732   match(rax_RegL);
 3733   match(rcx_RegL);
 3734   match(rdx_RegL);
 3735 
 3736   format %{ %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 // Flags register, used as output of compare instructions
 3741 operand rFlagsReg()
 3742 %{
 3743   constraint(ALLOC_IN_RC(int_flags));
 3744   match(RegFlags);
 3745 
 3746   format %{ "RFLAGS" %}
 3747   interface(REG_INTER);
 3748 %}
 3749 
 3750 // Flags register, used as output of FLOATING POINT compare instructions
 3751 operand rFlagsRegU()
 3752 %{
 3753   constraint(ALLOC_IN_RC(int_flags));
 3754   match(RegFlags);
 3755 
 3756   format %{ "RFLAGS_U" %}
 3757   interface(REG_INTER);
 3758 %}
 3759 
 3760 operand rFlagsRegUCF() %{
 3761   constraint(ALLOC_IN_RC(int_flags));
 3762   match(RegFlags);
 3763   predicate(false);
 3764 
 3765   format %{ "RFLAGS_U_CF" %}
 3766   interface(REG_INTER);
 3767 %}
 3768 
 3769 // Float register operands
 3770 operand regF() %{
 3771    constraint(ALLOC_IN_RC(float_reg));
 3772    match(RegF);
 3773 
 3774    format %{ %}
 3775    interface(REG_INTER);
 3776 %}
 3777 
 3778 // Float register operands
 3779 operand legRegF() %{
 3780    constraint(ALLOC_IN_RC(float_reg_legacy));
 3781    match(RegF);
 3782 
 3783    format %{ %}
 3784    interface(REG_INTER);
 3785 %}
 3786 
 3787 // Float register operands
 3788 operand vlRegF() %{
 3789    constraint(ALLOC_IN_RC(float_reg_vl));
 3790    match(RegF);
 3791 
 3792    format %{ %}
 3793    interface(REG_INTER);
 3794 %}
 3795 
 3796 // Double register operands
 3797 operand regD() %{
 3798    constraint(ALLOC_IN_RC(double_reg));
 3799    match(RegD);
 3800 
 3801    format %{ %}
 3802    interface(REG_INTER);
 3803 %}
 3804 
 3805 // Double register operands
 3806 operand legRegD() %{
 3807    constraint(ALLOC_IN_RC(double_reg_legacy));
 3808    match(RegD);
 3809 
 3810    format %{ %}
 3811    interface(REG_INTER);
 3812 %}
 3813 
 3814 // Double register operands
 3815 operand vlRegD() %{
 3816    constraint(ALLOC_IN_RC(double_reg_vl));
 3817    match(RegD);
 3818 
 3819    format %{ %}
 3820    interface(REG_INTER);
 3821 %}
 3822 
 3823 //----------Memory Operands----------------------------------------------------
 3824 // Direct Memory Operand
 3825 // operand direct(immP addr)
 3826 // %{
 3827 //   match(addr);
 3828 
 3829 //   format %{ "[$addr]" %}
 3830 //   interface(MEMORY_INTER) %{
 3831 //     base(0xFFFFFFFF);
 3832 //     index(0x4);
 3833 //     scale(0x0);
 3834 //     disp($addr);
 3835 //   %}
 3836 // %}
 3837 
 3838 // Indirect Memory Operand
 3839 operand indirect(any_RegP reg)
 3840 %{
 3841   constraint(ALLOC_IN_RC(ptr_reg));
 3842   match(reg);
 3843 
 3844   format %{ "[$reg]" %}
 3845   interface(MEMORY_INTER) %{
 3846     base($reg);
 3847     index(0x4);
 3848     scale(0x0);
 3849     disp(0x0);
 3850   %}
 3851 %}
 3852 
 3853 // Indirect Memory Plus Short Offset Operand
 3854 operand indOffset8(any_RegP reg, immL8 off)
 3855 %{
 3856   constraint(ALLOC_IN_RC(ptr_reg));
 3857   match(AddP reg off);
 3858 
 3859   format %{ "[$reg + $off (8-bit)]" %}
 3860   interface(MEMORY_INTER) %{
 3861     base($reg);
 3862     index(0x4);
 3863     scale(0x0);
 3864     disp($off);
 3865   %}
 3866 %}
 3867 
 3868 // Indirect Memory Plus Long Offset Operand
 3869 operand indOffset32(any_RegP reg, immL32 off)
 3870 %{
 3871   constraint(ALLOC_IN_RC(ptr_reg));
 3872   match(AddP reg off);
 3873 
 3874   format %{ "[$reg + $off (32-bit)]" %}
 3875   interface(MEMORY_INTER) %{
 3876     base($reg);
 3877     index(0x4);
 3878     scale(0x0);
 3879     disp($off);
 3880   %}
 3881 %}
 3882 
 3883 // Indirect Memory Plus Index Register Plus Offset Operand
 3884 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3885 %{
 3886   constraint(ALLOC_IN_RC(ptr_reg));
 3887   match(AddP (AddP reg lreg) off);
 3888 
 3889   op_cost(10);
 3890   format %{"[$reg + $off + $lreg]" %}
 3891   interface(MEMORY_INTER) %{
 3892     base($reg);
 3893     index($lreg);
 3894     scale(0x0);
 3895     disp($off);
 3896   %}
 3897 %}
 3898 
 3899 // Indirect Memory Plus Index Register Plus Offset Operand
 3900 operand indIndex(any_RegP reg, rRegL lreg)
 3901 %{
 3902   constraint(ALLOC_IN_RC(ptr_reg));
 3903   match(AddP reg lreg);
 3904 
 3905   op_cost(10);
 3906   format %{"[$reg + $lreg]" %}
 3907   interface(MEMORY_INTER) %{
 3908     base($reg);
 3909     index($lreg);
 3910     scale(0x0);
 3911     disp(0x0);
 3912   %}
 3913 %}
 3914 
 3915 // Indirect Memory Times Scale Plus Index Register
 3916 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3917 %{
 3918   constraint(ALLOC_IN_RC(ptr_reg));
 3919   match(AddP reg (LShiftL lreg scale));
 3920 
 3921   op_cost(10);
 3922   format %{"[$reg + $lreg << $scale]" %}
 3923   interface(MEMORY_INTER) %{
 3924     base($reg);
 3925     index($lreg);
 3926     scale($scale);
 3927     disp(0x0);
 3928   %}
 3929 %}
 3930 
 3931 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3932 %{
 3933   constraint(ALLOC_IN_RC(ptr_reg));
 3934   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3935   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3936 
 3937   op_cost(10);
 3938   format %{"[$reg + pos $idx << $scale]" %}
 3939   interface(MEMORY_INTER) %{
 3940     base($reg);
 3941     index($idx);
 3942     scale($scale);
 3943     disp(0x0);
 3944   %}
 3945 %}
 3946 
 3947 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3948 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3949 %{
 3950   constraint(ALLOC_IN_RC(ptr_reg));
 3951   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3952 
 3953   op_cost(10);
 3954   format %{"[$reg + $off + $lreg << $scale]" %}
 3955   interface(MEMORY_INTER) %{
 3956     base($reg);
 3957     index($lreg);
 3958     scale($scale);
 3959     disp($off);
 3960   %}
 3961 %}
 3962 
 3963 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3964 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3965 %{
 3966   constraint(ALLOC_IN_RC(ptr_reg));
 3967   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3968   match(AddP (AddP reg (ConvI2L idx)) off);
 3969 
 3970   op_cost(10);
 3971   format %{"[$reg + $off + $idx]" %}
 3972   interface(MEMORY_INTER) %{
 3973     base($reg);
 3974     index($idx);
 3975     scale(0x0);
 3976     disp($off);
 3977   %}
 3978 %}
 3979 
 3980 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3981 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3982 %{
 3983   constraint(ALLOC_IN_RC(ptr_reg));
 3984   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3985   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3986 
 3987   op_cost(10);
 3988   format %{"[$reg + $off + $idx << $scale]" %}
 3989   interface(MEMORY_INTER) %{
 3990     base($reg);
 3991     index($idx);
 3992     scale($scale);
 3993     disp($off);
 3994   %}
 3995 %}
 3996 
 3997 // Indirect Narrow Oop Plus Offset Operand
 3998 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3999 // we can't free r12 even with CompressedOops::base() == NULL.
 4000 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4001   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4002   constraint(ALLOC_IN_RC(ptr_reg));
 4003   match(AddP (DecodeN reg) off);
 4004 
 4005   op_cost(10);
 4006   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4007   interface(MEMORY_INTER) %{
 4008     base(0xc); // R12
 4009     index($reg);
 4010     scale(0x3);
 4011     disp($off);
 4012   %}
 4013 %}
 4014 
 4015 // Indirect Memory Operand
 4016 operand indirectNarrow(rRegN reg)
 4017 %{
 4018   predicate(CompressedOops::shift() == 0);
 4019   constraint(ALLOC_IN_RC(ptr_reg));
 4020   match(DecodeN reg);
 4021 
 4022   format %{ "[$reg]" %}
 4023   interface(MEMORY_INTER) %{
 4024     base($reg);
 4025     index(0x4);
 4026     scale(0x0);
 4027     disp(0x0);
 4028   %}
 4029 %}
 4030 
 4031 // Indirect Memory Plus Short Offset Operand
 4032 operand indOffset8Narrow(rRegN reg, immL8 off)
 4033 %{
 4034   predicate(CompressedOops::shift() == 0);
 4035   constraint(ALLOC_IN_RC(ptr_reg));
 4036   match(AddP (DecodeN reg) off);
 4037 
 4038   format %{ "[$reg + $off (8-bit)]" %}
 4039   interface(MEMORY_INTER) %{
 4040     base($reg);
 4041     index(0x4);
 4042     scale(0x0);
 4043     disp($off);
 4044   %}
 4045 %}
 4046 
 4047 // Indirect Memory Plus Long Offset Operand
 4048 operand indOffset32Narrow(rRegN reg, immL32 off)
 4049 %{
 4050   predicate(CompressedOops::shift() == 0);
 4051   constraint(ALLOC_IN_RC(ptr_reg));
 4052   match(AddP (DecodeN reg) off);
 4053 
 4054   format %{ "[$reg + $off (32-bit)]" %}
 4055   interface(MEMORY_INTER) %{
 4056     base($reg);
 4057     index(0x4);
 4058     scale(0x0);
 4059     disp($off);
 4060   %}
 4061 %}
 4062 
 4063 // Indirect Memory Plus Index Register Plus Offset Operand
 4064 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 4065 %{
 4066   predicate(CompressedOops::shift() == 0);
 4067   constraint(ALLOC_IN_RC(ptr_reg));
 4068   match(AddP (AddP (DecodeN reg) lreg) off);
 4069 
 4070   op_cost(10);
 4071   format %{"[$reg + $off + $lreg]" %}
 4072   interface(MEMORY_INTER) %{
 4073     base($reg);
 4074     index($lreg);
 4075     scale(0x0);
 4076     disp($off);
 4077   %}
 4078 %}
 4079 
 4080 // Indirect Memory Plus Index Register Plus Offset Operand
 4081 operand indIndexNarrow(rRegN reg, rRegL lreg)
 4082 %{
 4083   predicate(CompressedOops::shift() == 0);
 4084   constraint(ALLOC_IN_RC(ptr_reg));
 4085   match(AddP (DecodeN reg) lreg);
 4086 
 4087   op_cost(10);
 4088   format %{"[$reg + $lreg]" %}
 4089   interface(MEMORY_INTER) %{
 4090     base($reg);
 4091     index($lreg);
 4092     scale(0x0);
 4093     disp(0x0);
 4094   %}
 4095 %}
 4096 
 4097 // Indirect Memory Times Scale Plus Index Register
 4098 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 4099 %{
 4100   predicate(CompressedOops::shift() == 0);
 4101   constraint(ALLOC_IN_RC(ptr_reg));
 4102   match(AddP (DecodeN reg) (LShiftL lreg scale));
 4103 
 4104   op_cost(10);
 4105   format %{"[$reg + $lreg << $scale]" %}
 4106   interface(MEMORY_INTER) %{
 4107     base($reg);
 4108     index($lreg);
 4109     scale($scale);
 4110     disp(0x0);
 4111   %}
 4112 %}
 4113 
 4114 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4115 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 4116 %{
 4117   predicate(CompressedOops::shift() == 0);
 4118   constraint(ALLOC_IN_RC(ptr_reg));
 4119   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 4120 
 4121   op_cost(10);
 4122   format %{"[$reg + $off + $lreg << $scale]" %}
 4123   interface(MEMORY_INTER) %{
 4124     base($reg);
 4125     index($lreg);
 4126     scale($scale);
 4127     disp($off);
 4128   %}
 4129 %}
 4130 
 4131 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 4132 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 4133 %{
 4134   constraint(ALLOC_IN_RC(ptr_reg));
 4135   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 4136   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 4137 
 4138   op_cost(10);
 4139   format %{"[$reg + $off + $idx]" %}
 4140   interface(MEMORY_INTER) %{
 4141     base($reg);
 4142     index($idx);
 4143     scale(0x0);
 4144     disp($off);
 4145   %}
 4146 %}
 4147 
 4148 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4149 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 4150 %{
 4151   constraint(ALLOC_IN_RC(ptr_reg));
 4152   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4153   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 4154 
 4155   op_cost(10);
 4156   format %{"[$reg + $off + $idx << $scale]" %}
 4157   interface(MEMORY_INTER) %{
 4158     base($reg);
 4159     index($idx);
 4160     scale($scale);
 4161     disp($off);
 4162   %}
 4163 %}
 4164 
 4165 //----------Special Memory Operands--------------------------------------------
 4166 // Stack Slot Operand - This operand is used for loading and storing temporary
 4167 //                      values on the stack where a match requires a value to
 4168 //                      flow through memory.
 4169 operand stackSlotP(sRegP reg)
 4170 %{
 4171   constraint(ALLOC_IN_RC(stack_slots));
 4172   // No match rule because this operand is only generated in matching
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base(0x4);   // RSP
 4177     index(0x4);  // No Index
 4178     scale(0x0);  // No Scale
 4179     disp($reg);  // Stack Offset
 4180   %}
 4181 %}
 4182 
 4183 operand stackSlotI(sRegI reg)
 4184 %{
 4185   constraint(ALLOC_IN_RC(stack_slots));
 4186   // No match rule because this operand is only generated in matching
 4187 
 4188   format %{ "[$reg]" %}
 4189   interface(MEMORY_INTER) %{
 4190     base(0x4);   // RSP
 4191     index(0x4);  // No Index
 4192     scale(0x0);  // No Scale
 4193     disp($reg);  // Stack Offset
 4194   %}
 4195 %}
 4196 
 4197 operand stackSlotF(sRegF reg)
 4198 %{
 4199   constraint(ALLOC_IN_RC(stack_slots));
 4200   // No match rule because this operand is only generated in matching
 4201 
 4202   format %{ "[$reg]" %}
 4203   interface(MEMORY_INTER) %{
 4204     base(0x4);   // RSP
 4205     index(0x4);  // No Index
 4206     scale(0x0);  // No Scale
 4207     disp($reg);  // Stack Offset
 4208   %}
 4209 %}
 4210 
 4211 operand stackSlotD(sRegD reg)
 4212 %{
 4213   constraint(ALLOC_IN_RC(stack_slots));
 4214   // No match rule because this operand is only generated in matching
 4215 
 4216   format %{ "[$reg]" %}
 4217   interface(MEMORY_INTER) %{
 4218     base(0x4);   // RSP
 4219     index(0x4);  // No Index
 4220     scale(0x0);  // No Scale
 4221     disp($reg);  // Stack Offset
 4222   %}
 4223 %}
 4224 operand stackSlotL(sRegL reg)
 4225 %{
 4226   constraint(ALLOC_IN_RC(stack_slots));
 4227   // No match rule because this operand is only generated in matching
 4228 
 4229   format %{ "[$reg]" %}
 4230   interface(MEMORY_INTER) %{
 4231     base(0x4);   // RSP
 4232     index(0x4);  // No Index
 4233     scale(0x0);  // No Scale
 4234     disp($reg);  // Stack Offset
 4235   %}
 4236 %}
 4237 
 4238 //----------Conditional Branch Operands----------------------------------------
 4239 // Comparison Op  - This is the operation of the comparison, and is limited to
 4240 //                  the following set of codes:
 4241 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4242 //
 4243 // Other attributes of the comparison, such as unsignedness, are specified
 4244 // by the comparison instruction that sets a condition code flags register.
 4245 // That result is represented by a flags operand whose subtype is appropriate
 4246 // to the unsignedness (etc.) of the comparison.
 4247 //
 4248 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4249 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4250 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4251 
 4252 // Comparison Code
 4253 operand cmpOp()
 4254 %{
 4255   match(Bool);
 4256 
 4257   format %{ "" %}
 4258   interface(COND_INTER) %{
 4259     equal(0x4, "e");
 4260     not_equal(0x5, "ne");
 4261     less(0xC, "l");
 4262     greater_equal(0xD, "ge");
 4263     less_equal(0xE, "le");
 4264     greater(0xF, "g");
 4265     overflow(0x0, "o");
 4266     no_overflow(0x1, "no");
 4267   %}
 4268 %}
 4269 
 4270 // Comparison Code, unsigned compare.  Used by FP also, with
 4271 // C2 (unordered) turned into GT or LT already.  The other bits
 4272 // C0 and C3 are turned into Carry & Zero flags.
 4273 operand cmpOpU()
 4274 %{
 4275   match(Bool);
 4276 
 4277   format %{ "" %}
 4278   interface(COND_INTER) %{
 4279     equal(0x4, "e");
 4280     not_equal(0x5, "ne");
 4281     less(0x2, "b");
 4282     greater_equal(0x3, "ae");
 4283     less_equal(0x6, "be");
 4284     greater(0x7, "a");
 4285     overflow(0x0, "o");
 4286     no_overflow(0x1, "no");
 4287   %}
 4288 %}
 4289 
 4290 
 4291 // Floating comparisons that don't require any fixup for the unordered case,
 4292 // If both inputs of the comparison are the same, ZF is always set so we
 4293 // don't need to use cmpOpUCF2 for eq/ne
 4294 operand cmpOpUCF() %{
 4295   match(Bool);
 4296   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4297             n->as_Bool()->_test._test == BoolTest::ge ||
 4298             n->as_Bool()->_test._test == BoolTest::le ||
 4299             n->as_Bool()->_test._test == BoolTest::gt ||
 4300             n->in(1)->in(1) == n->in(1)->in(2));
 4301   format %{ "" %}
 4302   interface(COND_INTER) %{
 4303     equal(0xb, "np");
 4304     not_equal(0xa, "p");
 4305     less(0x2, "b");
 4306     greater_equal(0x3, "ae");
 4307     less_equal(0x6, "be");
 4308     greater(0x7, "a");
 4309     overflow(0x0, "o");
 4310     no_overflow(0x1, "no");
 4311   %}
 4312 %}
 4313 
 4314 
 4315 // Floating comparisons that can be fixed up with extra conditional jumps
 4316 operand cmpOpUCF2() %{
 4317   match(Bool);
 4318   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 4319              n->as_Bool()->_test._test == BoolTest::eq) &&
 4320             n->in(1)->in(1) != n->in(1)->in(2));
 4321   format %{ "" %}
 4322   interface(COND_INTER) %{
 4323     equal(0x4, "e");
 4324     not_equal(0x5, "ne");
 4325     less(0x2, "b");
 4326     greater_equal(0x3, "ae");
 4327     less_equal(0x6, "be");
 4328     greater(0x7, "a");
 4329     overflow(0x0, "o");
 4330     no_overflow(0x1, "no");
 4331   %}
 4332 %}
 4333 
 4334 //----------OPERAND CLASSES----------------------------------------------------
 4335 // Operand Classes are groups of operands that are used as to simplify
 4336 // instruction definitions by not requiring the AD writer to specify separate
 4337 // instructions for every form of operand when the instruction accepts
 4338 // multiple operand types with the same basic encoding and format.  The classic
 4339 // case of this is memory operands.
 4340 
 4341 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4342                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4343                indCompressedOopOffset,
 4344                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4345                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4346                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4347 
 4348 //----------PIPELINE-----------------------------------------------------------
 4349 // Rules which define the behavior of the target architectures pipeline.
 4350 pipeline %{
 4351 
 4352 //----------ATTRIBUTES---------------------------------------------------------
 4353 attributes %{
 4354   variable_size_instructions;        // Fixed size instructions
 4355   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4356   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4357   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4358   instruction_fetch_units = 1;       // of 16 bytes
 4359 
 4360   // List of nop instructions
 4361   nops( MachNop );
 4362 %}
 4363 
 4364 //----------RESOURCES----------------------------------------------------------
 4365 // Resources are the functional units available to the machine
 4366 
 4367 // Generic P2/P3 pipeline
 4368 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4369 // 3 instructions decoded per cycle.
 4370 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4371 // 3 ALU op, only ALU0 handles mul instructions.
 4372 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4373            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 4374            BR, FPU,
 4375            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 4376 
 4377 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4378 // Pipeline Description specifies the stages in the machine's pipeline
 4379 
 4380 // Generic P2/P3 pipeline
 4381 pipe_desc(S0, S1, S2, S3, S4, S5);
 4382 
 4383 //----------PIPELINE CLASSES---------------------------------------------------
 4384 // Pipeline Classes describe the stages in which input and output are
 4385 // referenced by the hardware pipeline.
 4386 
 4387 // Naming convention: ialu or fpu
 4388 // Then: _reg
 4389 // Then: _reg if there is a 2nd register
 4390 // Then: _long if it's a pair of instructions implementing a long
 4391 // Then: _fat if it requires the big decoder
 4392 //   Or: _mem if it requires the big decoder and a memory unit.
 4393 
 4394 // Integer ALU reg operation
 4395 pipe_class ialu_reg(rRegI dst)
 4396 %{
 4397     single_instruction;
 4398     dst    : S4(write);
 4399     dst    : S3(read);
 4400     DECODE : S0;        // any decoder
 4401     ALU    : S3;        // any alu
 4402 %}
 4403 
 4404 // Long ALU reg operation
 4405 pipe_class ialu_reg_long(rRegL dst)
 4406 %{
 4407     instruction_count(2);
 4408     dst    : S4(write);
 4409     dst    : S3(read);
 4410     DECODE : S0(2);     // any 2 decoders
 4411     ALU    : S3(2);     // both alus
 4412 %}
 4413 
 4414 // Integer ALU reg operation using big decoder
 4415 pipe_class ialu_reg_fat(rRegI dst)
 4416 %{
 4417     single_instruction;
 4418     dst    : S4(write);
 4419     dst    : S3(read);
 4420     D0     : S0;        // big decoder only
 4421     ALU    : S3;        // any alu
 4422 %}
 4423 
 4424 // Integer ALU reg-reg operation
 4425 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 4426 %{
 4427     single_instruction;
 4428     dst    : S4(write);
 4429     src    : S3(read);
 4430     DECODE : S0;        // any decoder
 4431     ALU    : S3;        // any alu
 4432 %}
 4433 
 4434 // Integer ALU reg-reg operation
 4435 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 4436 %{
 4437     single_instruction;
 4438     dst    : S4(write);
 4439     src    : S3(read);
 4440     D0     : S0;        // big decoder only
 4441     ALU    : S3;        // any alu
 4442 %}
 4443 
 4444 // Integer ALU reg-mem operation
 4445 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 4446 %{
 4447     single_instruction;
 4448     dst    : S5(write);
 4449     mem    : S3(read);
 4450     D0     : S0;        // big decoder only
 4451     ALU    : S4;        // any alu
 4452     MEM    : S3;        // any mem
 4453 %}
 4454 
 4455 // Integer mem operation (prefetch)
 4456 pipe_class ialu_mem(memory mem)
 4457 %{
 4458     single_instruction;
 4459     mem    : S3(read);
 4460     D0     : S0;        // big decoder only
 4461     MEM    : S3;        // any mem
 4462 %}
 4463 
 4464 // Integer Store to Memory
 4465 pipe_class ialu_mem_reg(memory mem, rRegI src)
 4466 %{
 4467     single_instruction;
 4468     mem    : S3(read);
 4469     src    : S5(read);
 4470     D0     : S0;        // big decoder only
 4471     ALU    : S4;        // any alu
 4472     MEM    : S3;
 4473 %}
 4474 
 4475 // // Long Store to Memory
 4476 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 4477 // %{
 4478 //     instruction_count(2);
 4479 //     mem    : S3(read);
 4480 //     src    : S5(read);
 4481 //     D0     : S0(2);          // big decoder only; twice
 4482 //     ALU    : S4(2);     // any 2 alus
 4483 //     MEM    : S3(2);  // Both mems
 4484 // %}
 4485 
 4486 // Integer Store to Memory
 4487 pipe_class ialu_mem_imm(memory mem)
 4488 %{
 4489     single_instruction;
 4490     mem    : S3(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S4;        // any alu
 4493     MEM    : S3;
 4494 %}
 4495 
 4496 // Integer ALU0 reg-reg operation
 4497 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 4498 %{
 4499     single_instruction;
 4500     dst    : S4(write);
 4501     src    : S3(read);
 4502     D0     : S0;        // Big decoder only
 4503     ALU0   : S3;        // only alu0
 4504 %}
 4505 
 4506 // Integer ALU0 reg-mem operation
 4507 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 4508 %{
 4509     single_instruction;
 4510     dst    : S5(write);
 4511     mem    : S3(read);
 4512     D0     : S0;        // big decoder only
 4513     ALU0   : S4;        // ALU0 only
 4514     MEM    : S3;        // any mem
 4515 %}
 4516 
 4517 // Integer ALU reg-reg operation
 4518 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 4519 %{
 4520     single_instruction;
 4521     cr     : S4(write);
 4522     src1   : S3(read);
 4523     src2   : S3(read);
 4524     DECODE : S0;        // any decoder
 4525     ALU    : S3;        // any alu
 4526 %}
 4527 
 4528 // Integer ALU reg-imm operation
 4529 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 4530 %{
 4531     single_instruction;
 4532     cr     : S4(write);
 4533     src1   : S3(read);
 4534     DECODE : S0;        // any decoder
 4535     ALU    : S3;        // any alu
 4536 %}
 4537 
 4538 // Integer ALU reg-mem operation
 4539 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 4540 %{
 4541     single_instruction;
 4542     cr     : S4(write);
 4543     src1   : S3(read);
 4544     src2   : S3(read);
 4545     D0     : S0;        // big decoder only
 4546     ALU    : S4;        // any alu
 4547     MEM    : S3;
 4548 %}
 4549 
 4550 // Conditional move reg-reg
 4551 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 4552 %{
 4553     instruction_count(4);
 4554     y      : S4(read);
 4555     q      : S3(read);
 4556     p      : S3(read);
 4557     DECODE : S0(4);     // any decoder
 4558 %}
 4559 
 4560 // Conditional move reg-reg
 4561 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 4562 %{
 4563     single_instruction;
 4564     dst    : S4(write);
 4565     src    : S3(read);
 4566     cr     : S3(read);
 4567     DECODE : S0;        // any decoder
 4568 %}
 4569 
 4570 // Conditional move reg-mem
 4571 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 4572 %{
 4573     single_instruction;
 4574     dst    : S4(write);
 4575     src    : S3(read);
 4576     cr     : S3(read);
 4577     DECODE : S0;        // any decoder
 4578     MEM    : S3;
 4579 %}
 4580 
 4581 // Conditional move reg-reg long
 4582 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 4583 %{
 4584     single_instruction;
 4585     dst    : S4(write);
 4586     src    : S3(read);
 4587     cr     : S3(read);
 4588     DECODE : S0(2);     // any 2 decoders
 4589 %}
 4590 
 4591 // XXX
 4592 // // Conditional move double reg-reg
 4593 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
 4594 // %{
 4595 //     single_instruction;
 4596 //     dst    : S4(write);
 4597 //     src    : S3(read);
 4598 //     cr     : S3(read);
 4599 //     DECODE : S0;     // any decoder
 4600 // %}
 4601 
 4602 // Float reg-reg operation
 4603 pipe_class fpu_reg(regD dst)
 4604 %{
 4605     instruction_count(2);
 4606     dst    : S3(read);
 4607     DECODE : S0(2);     // any 2 decoders
 4608     FPU    : S3;
 4609 %}
 4610 
 4611 // Float reg-reg operation
 4612 pipe_class fpu_reg_reg(regD dst, regD src)
 4613 %{
 4614     instruction_count(2);
 4615     dst    : S4(write);
 4616     src    : S3(read);
 4617     DECODE : S0(2);     // any 2 decoders
 4618     FPU    : S3;
 4619 %}
 4620 
 4621 // Float reg-reg operation
 4622 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 4623 %{
 4624     instruction_count(3);
 4625     dst    : S4(write);
 4626     src1   : S3(read);
 4627     src2   : S3(read);
 4628     DECODE : S0(3);     // any 3 decoders
 4629     FPU    : S3(2);
 4630 %}
 4631 
 4632 // Float reg-reg operation
 4633 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 4634 %{
 4635     instruction_count(4);
 4636     dst    : S4(write);
 4637     src1   : S3(read);
 4638     src2   : S3(read);
 4639     src3   : S3(read);
 4640     DECODE : S0(4);     // any 3 decoders
 4641     FPU    : S3(2);
 4642 %}
 4643 
 4644 // Float reg-reg operation
 4645 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 4646 %{
 4647     instruction_count(4);
 4648     dst    : S4(write);
 4649     src1   : S3(read);
 4650     src2   : S3(read);
 4651     src3   : S3(read);
 4652     DECODE : S1(3);     // any 3 decoders
 4653     D0     : S0;        // Big decoder only
 4654     FPU    : S3(2);
 4655     MEM    : S3;
 4656 %}
 4657 
 4658 // Float reg-mem operation
 4659 pipe_class fpu_reg_mem(regD dst, memory mem)
 4660 %{
 4661     instruction_count(2);
 4662     dst    : S5(write);
 4663     mem    : S3(read);
 4664     D0     : S0;        // big decoder only
 4665     DECODE : S1;        // any decoder for FPU POP
 4666     FPU    : S4;
 4667     MEM    : S3;        // any mem
 4668 %}
 4669 
 4670 // Float reg-mem operation
 4671 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 4672 %{
 4673     instruction_count(3);
 4674     dst    : S5(write);
 4675     src1   : S3(read);
 4676     mem    : S3(read);
 4677     D0     : S0;        // big decoder only
 4678     DECODE : S1(2);     // any decoder for FPU POP
 4679     FPU    : S4;
 4680     MEM    : S3;        // any mem
 4681 %}
 4682 
 4683 // Float mem-reg operation
 4684 pipe_class fpu_mem_reg(memory mem, regD src)
 4685 %{
 4686     instruction_count(2);
 4687     src    : S5(read);
 4688     mem    : S3(read);
 4689     DECODE : S0;        // any decoder for FPU PUSH
 4690     D0     : S1;        // big decoder only
 4691     FPU    : S4;
 4692     MEM    : S3;        // any mem
 4693 %}
 4694 
 4695 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 4696 %{
 4697     instruction_count(3);
 4698     src1   : S3(read);
 4699     src2   : S3(read);
 4700     mem    : S3(read);
 4701     DECODE : S0(2);     // any decoder for FPU PUSH
 4702     D0     : S1;        // big decoder only
 4703     FPU    : S4;
 4704     MEM    : S3;        // any mem
 4705 %}
 4706 
 4707 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 4708 %{
 4709     instruction_count(3);
 4710     src1   : S3(read);
 4711     src2   : S3(read);
 4712     mem    : S4(read);
 4713     DECODE : S0;        // any decoder for FPU PUSH
 4714     D0     : S0(2);     // big decoder only
 4715     FPU    : S4;
 4716     MEM    : S3(2);     // any mem
 4717 %}
 4718 
 4719 pipe_class fpu_mem_mem(memory dst, memory src1)
 4720 %{
 4721     instruction_count(2);
 4722     src1   : S3(read);
 4723     dst    : S4(read);
 4724     D0     : S0(2);     // big decoder only
 4725     MEM    : S3(2);     // any mem
 4726 %}
 4727 
 4728 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 4729 %{
 4730     instruction_count(3);
 4731     src1   : S3(read);
 4732     src2   : S3(read);
 4733     dst    : S4(read);
 4734     D0     : S0(3);     // big decoder only
 4735     FPU    : S4;
 4736     MEM    : S3(3);     // any mem
 4737 %}
 4738 
 4739 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 4740 %{
 4741     instruction_count(3);
 4742     src1   : S4(read);
 4743     mem    : S4(read);
 4744     DECODE : S0;        // any decoder for FPU PUSH
 4745     D0     : S0(2);     // big decoder only
 4746     FPU    : S4;
 4747     MEM    : S3(2);     // any mem
 4748 %}
 4749 
 4750 // Float load constant
 4751 pipe_class fpu_reg_con(regD dst)
 4752 %{
 4753     instruction_count(2);
 4754     dst    : S5(write);
 4755     D0     : S0;        // big decoder only for the load
 4756     DECODE : S1;        // any decoder for FPU POP
 4757     FPU    : S4;
 4758     MEM    : S3;        // any mem
 4759 %}
 4760 
 4761 // Float load constant
 4762 pipe_class fpu_reg_reg_con(regD dst, regD src)
 4763 %{
 4764     instruction_count(3);
 4765     dst    : S5(write);
 4766     src    : S3(read);
 4767     D0     : S0;        // big decoder only for the load
 4768     DECODE : S1(2);     // any decoder for FPU POP
 4769     FPU    : S4;
 4770     MEM    : S3;        // any mem
 4771 %}
 4772 
 4773 // UnConditional branch
 4774 pipe_class pipe_jmp(label labl)
 4775 %{
 4776     single_instruction;
 4777     BR   : S3;
 4778 %}
 4779 
 4780 // Conditional branch
 4781 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 4782 %{
 4783     single_instruction;
 4784     cr    : S1(read);
 4785     BR    : S3;
 4786 %}
 4787 
 4788 // Allocation idiom
 4789 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 4790 %{
 4791     instruction_count(1); force_serialization;
 4792     fixed_latency(6);
 4793     heap_ptr : S3(read);
 4794     DECODE   : S0(3);
 4795     D0       : S2;
 4796     MEM      : S3;
 4797     ALU      : S3(2);
 4798     dst      : S5(write);
 4799     BR       : S5;
 4800 %}
 4801 
 4802 // Generic big/slow expanded idiom
 4803 pipe_class pipe_slow()
 4804 %{
 4805     instruction_count(10); multiple_bundles; force_serialization;
 4806     fixed_latency(100);
 4807     D0  : S0(2);
 4808     MEM : S3(2);
 4809 %}
 4810 
 4811 // The real do-nothing guy
 4812 pipe_class empty()
 4813 %{
 4814     instruction_count(0);
 4815 %}
 4816 
 4817 // Define the class for the Nop node
 4818 define
 4819 %{
 4820    MachNop = empty;
 4821 %}
 4822 
 4823 %}
 4824 
 4825 //----------INSTRUCTIONS-------------------------------------------------------
 4826 //
 4827 // match      -- States which machine-independent subtree may be replaced
 4828 //               by this instruction.
 4829 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4830 //               selection to identify a minimum cost tree of machine
 4831 //               instructions that matches a tree of machine-independent
 4832 //               instructions.
 4833 // format     -- A string providing the disassembly for this instruction.
 4834 //               The value of an instruction's operand may be inserted
 4835 //               by referring to it with a '$' prefix.
 4836 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4837 //               to within an encode class as $primary, $secondary, and $tertiary
 4838 //               rrspectively.  The primary opcode is commonly used to
 4839 //               indicate the type of machine instruction, while secondary
 4840 //               and tertiary are often used for prefix options or addressing
 4841 //               modes.
 4842 // ins_encode -- A list of encode classes with parameters. The encode class
 4843 //               name must have been defined in an 'enc_class' specification
 4844 //               in the encode section of the architecture description.
 4845 
 4846 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4847 // Load Float
 4848 instruct MoveF2VL(vlRegF dst, regF src) %{
 4849   match(Set dst src);
 4850   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4851   ins_encode %{
 4852     ShouldNotReachHere();
 4853   %}
 4854   ins_pipe( fpu_reg_reg );
 4855 %}
 4856 
 4857 // Load Float
 4858 instruct MoveF2LEG(legRegF dst, regF src) %{
 4859   match(Set dst src);
 4860   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4861   ins_encode %{
 4862     ShouldNotReachHere();
 4863   %}
 4864   ins_pipe( fpu_reg_reg );
 4865 %}
 4866 
 4867 // Load Float
 4868 instruct MoveVL2F(regF dst, vlRegF src) %{
 4869   match(Set dst src);
 4870   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4871   ins_encode %{
 4872     ShouldNotReachHere();
 4873   %}
 4874   ins_pipe( fpu_reg_reg );
 4875 %}
 4876 
 4877 // Load Float
 4878 instruct MoveLEG2F(regF dst, legRegF src) %{
 4879   match(Set dst src);
 4880   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4881   ins_encode %{
 4882     ShouldNotReachHere();
 4883   %}
 4884   ins_pipe( fpu_reg_reg );
 4885 %}
 4886 
 4887 // Load Double
 4888 instruct MoveD2VL(vlRegD dst, regD src) %{
 4889   match(Set dst src);
 4890   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4891   ins_encode %{
 4892     ShouldNotReachHere();
 4893   %}
 4894   ins_pipe( fpu_reg_reg );
 4895 %}
 4896 
 4897 // Load Double
 4898 instruct MoveD2LEG(legRegD dst, regD src) %{
 4899   match(Set dst src);
 4900   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4901   ins_encode %{
 4902     ShouldNotReachHere();
 4903   %}
 4904   ins_pipe( fpu_reg_reg );
 4905 %}
 4906 
 4907 // Load Double
 4908 instruct MoveVL2D(regD dst, vlRegD src) %{
 4909   match(Set dst src);
 4910   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4911   ins_encode %{
 4912     ShouldNotReachHere();
 4913   %}
 4914   ins_pipe( fpu_reg_reg );
 4915 %}
 4916 
 4917 // Load Double
 4918 instruct MoveLEG2D(regD dst, legRegD src) %{
 4919   match(Set dst src);
 4920   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4921   ins_encode %{
 4922     ShouldNotReachHere();
 4923   %}
 4924   ins_pipe( fpu_reg_reg );
 4925 %}
 4926 
 4927 //----------Load/Store/Move Instructions---------------------------------------
 4928 //----------Load Instructions--------------------------------------------------
 4929 
 4930 // Load Byte (8 bit signed)
 4931 instruct loadB(rRegI dst, memory mem)
 4932 %{
 4933   match(Set dst (LoadB mem));
 4934 
 4935   ins_cost(125);
 4936   format %{ "movsbl  $dst, $mem\t# byte" %}
 4937 
 4938   ins_encode %{
 4939     __ movsbl($dst$$Register, $mem$$Address);
 4940   %}
 4941 
 4942   ins_pipe(ialu_reg_mem);
 4943 %}
 4944 
 4945 // Load Byte (8 bit signed) into Long Register
 4946 instruct loadB2L(rRegL dst, memory mem)
 4947 %{
 4948   match(Set dst (ConvI2L (LoadB mem)));
 4949 
 4950   ins_cost(125);
 4951   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4952 
 4953   ins_encode %{
 4954     __ movsbq($dst$$Register, $mem$$Address);
 4955   %}
 4956 
 4957   ins_pipe(ialu_reg_mem);
 4958 %}
 4959 
 4960 // Load Unsigned Byte (8 bit UNsigned)
 4961 instruct loadUB(rRegI dst, memory mem)
 4962 %{
 4963   match(Set dst (LoadUB mem));
 4964 
 4965   ins_cost(125);
 4966   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4967 
 4968   ins_encode %{
 4969     __ movzbl($dst$$Register, $mem$$Address);
 4970   %}
 4971 
 4972   ins_pipe(ialu_reg_mem);
 4973 %}
 4974 
 4975 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4976 instruct loadUB2L(rRegL dst, memory mem)
 4977 %{
 4978   match(Set dst (ConvI2L (LoadUB mem)));
 4979 
 4980   ins_cost(125);
 4981   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 4982 
 4983   ins_encode %{
 4984     __ movzbq($dst$$Register, $mem$$Address);
 4985   %}
 4986 
 4987   ins_pipe(ialu_reg_mem);
 4988 %}
 4989 
 4990 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 4991 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 4992   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 4993   effect(KILL cr);
 4994 
 4995   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 4996             "andl    $dst, right_n_bits($mask, 8)" %}
 4997   ins_encode %{
 4998     Register Rdst = $dst$$Register;
 4999     __ movzbq(Rdst, $mem$$Address);
 5000     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5001   %}
 5002   ins_pipe(ialu_reg_mem);
 5003 %}
 5004 
 5005 // Load Short (16 bit signed)
 5006 instruct loadS(rRegI dst, memory mem)
 5007 %{
 5008   match(Set dst (LoadS mem));
 5009 
 5010   ins_cost(125);
 5011   format %{ "movswl $dst, $mem\t# short" %}
 5012 
 5013   ins_encode %{
 5014     __ movswl($dst$$Register, $mem$$Address);
 5015   %}
 5016 
 5017   ins_pipe(ialu_reg_mem);
 5018 %}
 5019 
 5020 // Load Short (16 bit signed) to Byte (8 bit signed)
 5021 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5022   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5023 
 5024   ins_cost(125);
 5025   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 5026   ins_encode %{
 5027     __ movsbl($dst$$Register, $mem$$Address);
 5028   %}
 5029   ins_pipe(ialu_reg_mem);
 5030 %}
 5031 
 5032 // Load Short (16 bit signed) into Long Register
 5033 instruct loadS2L(rRegL dst, memory mem)
 5034 %{
 5035   match(Set dst (ConvI2L (LoadS mem)));
 5036 
 5037   ins_cost(125);
 5038   format %{ "movswq $dst, $mem\t# short -> long" %}
 5039 
 5040   ins_encode %{
 5041     __ movswq($dst$$Register, $mem$$Address);
 5042   %}
 5043 
 5044   ins_pipe(ialu_reg_mem);
 5045 %}
 5046 
 5047 // Load Unsigned Short/Char (16 bit UNsigned)
 5048 instruct loadUS(rRegI dst, memory mem)
 5049 %{
 5050   match(Set dst (LoadUS mem));
 5051 
 5052   ins_cost(125);
 5053   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 5054 
 5055   ins_encode %{
 5056     __ movzwl($dst$$Register, $mem$$Address);
 5057   %}
 5058 
 5059   ins_pipe(ialu_reg_mem);
 5060 %}
 5061 
 5062 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5063 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5064   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5065 
 5066   ins_cost(125);
 5067   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 5068   ins_encode %{
 5069     __ movsbl($dst$$Register, $mem$$Address);
 5070   %}
 5071   ins_pipe(ialu_reg_mem);
 5072 %}
 5073 
 5074 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5075 instruct loadUS2L(rRegL dst, memory mem)
 5076 %{
 5077   match(Set dst (ConvI2L (LoadUS mem)));
 5078 
 5079   ins_cost(125);
 5080   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 5081 
 5082   ins_encode %{
 5083     __ movzwq($dst$$Register, $mem$$Address);
 5084   %}
 5085 
 5086   ins_pipe(ialu_reg_mem);
 5087 %}
 5088 
 5089 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5090 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5091   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5092 
 5093   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 5094   ins_encode %{
 5095     __ movzbq($dst$$Register, $mem$$Address);
 5096   %}
 5097   ins_pipe(ialu_reg_mem);
 5098 %}
 5099 
 5100 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 5101 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5102   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5103   effect(KILL cr);
 5104 
 5105   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5106             "andl    $dst, right_n_bits($mask, 16)" %}
 5107   ins_encode %{
 5108     Register Rdst = $dst$$Register;
 5109     __ movzwq(Rdst, $mem$$Address);
 5110     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5111   %}
 5112   ins_pipe(ialu_reg_mem);
 5113 %}
 5114 
 5115 // Load Integer
 5116 instruct loadI(rRegI dst, memory mem)
 5117 %{
 5118   match(Set dst (LoadI mem));
 5119 
 5120   ins_cost(125);
 5121   format %{ "movl    $dst, $mem\t# int" %}
 5122 
 5123   ins_encode %{
 5124     __ movl($dst$$Register, $mem$$Address);
 5125   %}
 5126 
 5127   ins_pipe(ialu_reg_mem);
 5128 %}
 5129 
 5130 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5131 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5132   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5133 
 5134   ins_cost(125);
 5135   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 5136   ins_encode %{
 5137     __ movsbl($dst$$Register, $mem$$Address);
 5138   %}
 5139   ins_pipe(ialu_reg_mem);
 5140 %}
 5141 
 5142 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5143 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5144   match(Set dst (AndI (LoadI mem) mask));
 5145 
 5146   ins_cost(125);
 5147   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 5148   ins_encode %{
 5149     __ movzbl($dst$$Register, $mem$$Address);
 5150   %}
 5151   ins_pipe(ialu_reg_mem);
 5152 %}
 5153 
 5154 // Load Integer (32 bit signed) to Short (16 bit signed)
 5155 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5156   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5157 
 5158   ins_cost(125);
 5159   format %{ "movswl  $dst, $mem\t# int -> short" %}
 5160   ins_encode %{
 5161     __ movswl($dst$$Register, $mem$$Address);
 5162   %}
 5163   ins_pipe(ialu_reg_mem);
 5164 %}
 5165 
 5166 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5167 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5168   match(Set dst (AndI (LoadI mem) mask));
 5169 
 5170   ins_cost(125);
 5171   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 5172   ins_encode %{
 5173     __ movzwl($dst$$Register, $mem$$Address);
 5174   %}
 5175   ins_pipe(ialu_reg_mem);
 5176 %}
 5177 
 5178 // Load Integer into Long Register
 5179 instruct loadI2L(rRegL dst, memory mem)
 5180 %{
 5181   match(Set dst (ConvI2L (LoadI mem)));
 5182 
 5183   ins_cost(125);
 5184   format %{ "movslq  $dst, $mem\t# int -> long" %}
 5185 
 5186   ins_encode %{
 5187     __ movslq($dst$$Register, $mem$$Address);
 5188   %}
 5189 
 5190   ins_pipe(ialu_reg_mem);
 5191 %}
 5192 
 5193 // Load Integer with mask 0xFF into Long Register
 5194 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5195   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5196 
 5197   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 5198   ins_encode %{
 5199     __ movzbq($dst$$Register, $mem$$Address);
 5200   %}
 5201   ins_pipe(ialu_reg_mem);
 5202 %}
 5203 
 5204 // Load Integer with mask 0xFFFF into Long Register
 5205 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 5206   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5207 
 5208   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 5209   ins_encode %{
 5210     __ movzwq($dst$$Register, $mem$$Address);
 5211   %}
 5212   ins_pipe(ialu_reg_mem);
 5213 %}
 5214 
 5215 // Load Integer with a 31-bit mask into Long Register
 5216 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 5217   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5218   effect(KILL cr);
 5219 
 5220   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 5221             "andl    $dst, $mask" %}
 5222   ins_encode %{
 5223     Register Rdst = $dst$$Register;
 5224     __ movl(Rdst, $mem$$Address);
 5225     __ andl(Rdst, $mask$$constant);
 5226   %}
 5227   ins_pipe(ialu_reg_mem);
 5228 %}
 5229 
 5230 // Load Unsigned Integer into Long Register
 5231 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 5232 %{
 5233   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5234 
 5235   ins_cost(125);
 5236   format %{ "movl    $dst, $mem\t# uint -> long" %}
 5237 
 5238   ins_encode %{
 5239     __ movl($dst$$Register, $mem$$Address);
 5240   %}
 5241 
 5242   ins_pipe(ialu_reg_mem);
 5243 %}
 5244 
 5245 // Load Long
 5246 instruct loadL(rRegL dst, memory mem)
 5247 %{
 5248   match(Set dst (LoadL mem));
 5249 
 5250   ins_cost(125);
 5251   format %{ "movq    $dst, $mem\t# long" %}
 5252 
 5253   ins_encode %{
 5254     __ movq($dst$$Register, $mem$$Address);
 5255   %}
 5256 
 5257   ins_pipe(ialu_reg_mem); // XXX
 5258 %}
 5259 
 5260 // Load Range
 5261 instruct loadRange(rRegI dst, memory mem)
 5262 %{
 5263   match(Set dst (LoadRange mem));
 5264 
 5265   ins_cost(125); // XXX
 5266   format %{ "movl    $dst, $mem\t# range" %}
 5267   ins_encode %{
 5268     __ movl($dst$$Register, $mem$$Address);
 5269   %}
 5270   ins_pipe(ialu_reg_mem);
 5271 %}
 5272 
 5273 // Load Pointer
 5274 instruct loadP(rRegP dst, memory mem)
 5275 %{
 5276   match(Set dst (LoadP mem));
 5277   predicate(n->as_Load()->barrier_data() == 0);
 5278 
 5279   ins_cost(125); // XXX
 5280   format %{ "movq    $dst, $mem\t# ptr" %}
 5281   ins_encode %{
 5282     __ movq($dst$$Register, $mem$$Address);
 5283   %}
 5284   ins_pipe(ialu_reg_mem); // XXX
 5285 %}
 5286 
 5287 // Load Compressed Pointer
 5288 instruct loadN(rRegN dst, memory mem)
 5289 %{
 5290    match(Set dst (LoadN mem));
 5291 
 5292    ins_cost(125); // XXX
 5293    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 5294    ins_encode %{
 5295      __ movl($dst$$Register, $mem$$Address);
 5296    %}
 5297    ins_pipe(ialu_reg_mem); // XXX
 5298 %}
 5299 
 5300 
 5301 // Load Klass Pointer
 5302 instruct loadKlass(rRegP dst, memory mem)
 5303 %{
 5304   match(Set dst (LoadKlass mem));
 5305 
 5306   ins_cost(125); // XXX
 5307   format %{ "movq    $dst, $mem\t# class" %}
 5308   ins_encode %{
 5309     __ movq($dst$$Register, $mem$$Address);
 5310   %}
 5311   ins_pipe(ialu_reg_mem); // XXX
 5312 %}
 5313 
 5314 // Load narrow Klass Pointer
 5315 instruct loadNKlass(rRegN dst, memory mem)
 5316 %{
 5317   match(Set dst (LoadNKlass mem));
 5318 
 5319   ins_cost(125); // XXX
 5320   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5321   ins_encode %{
 5322     __ movl($dst$$Register, $mem$$Address);
 5323   %}
 5324   ins_pipe(ialu_reg_mem); // XXX
 5325 %}
 5326 
 5327 // Load Float
 5328 instruct loadF(regF dst, memory mem)
 5329 %{
 5330   match(Set dst (LoadF mem));
 5331 
 5332   ins_cost(145); // XXX
 5333   format %{ "movss   $dst, $mem\t# float" %}
 5334   ins_encode %{
 5335     __ movflt($dst$$XMMRegister, $mem$$Address);
 5336   %}
 5337   ins_pipe(pipe_slow); // XXX
 5338 %}
 5339 
 5340 // Load Double
 5341 instruct loadD_partial(regD dst, memory mem)
 5342 %{
 5343   predicate(!UseXmmLoadAndClearUpper);
 5344   match(Set dst (LoadD mem));
 5345 
 5346   ins_cost(145); // XXX
 5347   format %{ "movlpd  $dst, $mem\t# double" %}
 5348   ins_encode %{
 5349     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5350   %}
 5351   ins_pipe(pipe_slow); // XXX
 5352 %}
 5353 
 5354 instruct loadD(regD dst, memory mem)
 5355 %{
 5356   predicate(UseXmmLoadAndClearUpper);
 5357   match(Set dst (LoadD mem));
 5358 
 5359   ins_cost(145); // XXX
 5360   format %{ "movsd   $dst, $mem\t# double" %}
 5361   ins_encode %{
 5362     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5363   %}
 5364   ins_pipe(pipe_slow); // XXX
 5365 %}
 5366 
 5367 
 5368 // Following pseudo code describes the algorithm for max[FD]:
 5369 // Min algorithm is on similar lines
 5370 //  btmp = (b < +0.0) ? a : b
 5371 //  atmp = (b < +0.0) ? b : a
 5372 //  Tmp  = Max_Float(atmp , btmp)
 5373 //  Res  = (atmp == NaN) ? atmp : Tmp
 5374 
 5375 // max = java.lang.Math.max(float a, float b)
 5376 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5377   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5378   match(Set dst (MaxF a b));
 5379   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5380   format %{
 5381      "vblendvps        $btmp,$b,$a,$b           \n\t"
 5382      "vblendvps        $atmp,$a,$b,$b           \n\t"
 5383      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 5384      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 5385      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 5386   %}
 5387   ins_encode %{
 5388     int vector_len = Assembler::AVX_128bit;
 5389     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5390     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5391     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5392     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5393     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5394  %}
 5395   ins_pipe( pipe_slow );
 5396 %}
 5397 
 5398 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5399   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5400   match(Set dst (MaxF a b));
 5401   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5402 
 5403   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 5404   ins_encode %{
 5405     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5406                     false /*min*/, true /*single*/);
 5407   %}
 5408   ins_pipe( pipe_slow );
 5409 %}
 5410 
 5411 // max = java.lang.Math.max(double a, double b)
 5412 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5413   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5414   match(Set dst (MaxD a b));
 5415   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 5416   format %{
 5417      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 5418      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 5419      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 5420      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 5421      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 5422   %}
 5423   ins_encode %{
 5424     int vector_len = Assembler::AVX_128bit;
 5425     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5426     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5427     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5428     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5429     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5430   %}
 5431   ins_pipe( pipe_slow );
 5432 %}
 5433 
 5434 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5435   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5436   match(Set dst (MaxD a b));
 5437   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5438 
 5439   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 5440   ins_encode %{
 5441     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5442                     false /*min*/, false /*single*/);
 5443   %}
 5444   ins_pipe( pipe_slow );
 5445 %}
 5446 
 5447 // min = java.lang.Math.min(float a, float b)
 5448 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5449   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5450   match(Set dst (MinF a b));
 5451   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5452   format %{
 5453      "vblendvps        $atmp,$a,$b,$a             \n\t"
 5454      "vblendvps        $btmp,$b,$a,$a             \n\t"
 5455      "vminss           $tmp,$atmp,$btmp           \n\t"
 5456      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 5457      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 5458   %}
 5459   ins_encode %{
 5460     int vector_len = Assembler::AVX_128bit;
 5461     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5462     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5463     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5464     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5465     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5466   %}
 5467   ins_pipe( pipe_slow );
 5468 %}
 5469 
 5470 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5471   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5472   match(Set dst (MinF a b));
 5473   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5474 
 5475   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 5476   ins_encode %{
 5477     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5478                     true /*min*/, true /*single*/);
 5479   %}
 5480   ins_pipe( pipe_slow );
 5481 %}
 5482 
 5483 // min = java.lang.Math.min(double a, double b)
 5484 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5485   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5486   match(Set dst (MinD a b));
 5487   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5488   format %{
 5489      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 5490      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 5491      "vminsd           $tmp,$atmp,$btmp         \n\t"
 5492      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 5493      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 5494   %}
 5495   ins_encode %{
 5496     int vector_len = Assembler::AVX_128bit;
 5497     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5498     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5499     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5500     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5501     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5502   %}
 5503   ins_pipe( pipe_slow );
 5504 %}
 5505 
 5506 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5507   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5508   match(Set dst (MinD a b));
 5509   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5510 
 5511   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 5512   ins_encode %{
 5513     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5514                     true /*min*/, false /*single*/);
 5515   %}
 5516   ins_pipe( pipe_slow );
 5517 %}
 5518 
 5519 // Load Effective Address
 5520 instruct leaP8(rRegP dst, indOffset8 mem)
 5521 %{
 5522   match(Set dst mem);
 5523 
 5524   ins_cost(110); // XXX
 5525   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 5526   ins_encode %{
 5527     __ leaq($dst$$Register, $mem$$Address);
 5528   %}
 5529   ins_pipe(ialu_reg_reg_fat);
 5530 %}
 5531 
 5532 instruct leaP32(rRegP dst, indOffset32 mem)
 5533 %{
 5534   match(Set dst mem);
 5535 
 5536   ins_cost(110);
 5537   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 5538   ins_encode %{
 5539     __ leaq($dst$$Register, $mem$$Address);
 5540   %}
 5541   ins_pipe(ialu_reg_reg_fat);
 5542 %}
 5543 
 5544 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 5545 %{
 5546   match(Set dst mem);
 5547 
 5548   ins_cost(110);
 5549   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 5550   ins_encode %{
 5551     __ leaq($dst$$Register, $mem$$Address);
 5552   %}
 5553   ins_pipe(ialu_reg_reg_fat);
 5554 %}
 5555 
 5556 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 5557 %{
 5558   match(Set dst mem);
 5559 
 5560   ins_cost(110);
 5561   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5562   ins_encode %{
 5563     __ leaq($dst$$Register, $mem$$Address);
 5564   %}
 5565   ins_pipe(ialu_reg_reg_fat);
 5566 %}
 5567 
 5568 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 5569 %{
 5570   match(Set dst mem);
 5571 
 5572   ins_cost(110);
 5573   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5574   ins_encode %{
 5575     __ leaq($dst$$Register, $mem$$Address);
 5576   %}
 5577   ins_pipe(ialu_reg_reg_fat);
 5578 %}
 5579 
 5580 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 5581 %{
 5582   match(Set dst mem);
 5583 
 5584   ins_cost(110);
 5585   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 5586   ins_encode %{
 5587     __ leaq($dst$$Register, $mem$$Address);
 5588   %}
 5589   ins_pipe(ialu_reg_reg_fat);
 5590 %}
 5591 
 5592 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 5593 %{
 5594   match(Set dst mem);
 5595 
 5596   ins_cost(110);
 5597   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 5598   ins_encode %{
 5599     __ leaq($dst$$Register, $mem$$Address);
 5600   %}
 5601   ins_pipe(ialu_reg_reg_fat);
 5602 %}
 5603 
 5604 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 5605 %{
 5606   match(Set dst mem);
 5607 
 5608   ins_cost(110);
 5609   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 5610   ins_encode %{
 5611     __ leaq($dst$$Register, $mem$$Address);
 5612   %}
 5613   ins_pipe(ialu_reg_reg_fat);
 5614 %}
 5615 
 5616 // Load Effective Address which uses Narrow (32-bits) oop
 5617 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 5618 %{
 5619   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 5620   match(Set dst mem);
 5621 
 5622   ins_cost(110);
 5623   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 5624   ins_encode %{
 5625     __ leaq($dst$$Register, $mem$$Address);
 5626   %}
 5627   ins_pipe(ialu_reg_reg_fat);
 5628 %}
 5629 
 5630 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 5631 %{
 5632   predicate(CompressedOops::shift() == 0);
 5633   match(Set dst mem);
 5634 
 5635   ins_cost(110); // XXX
 5636   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 5637   ins_encode %{
 5638     __ leaq($dst$$Register, $mem$$Address);
 5639   %}
 5640   ins_pipe(ialu_reg_reg_fat);
 5641 %}
 5642 
 5643 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 5644 %{
 5645   predicate(CompressedOops::shift() == 0);
 5646   match(Set dst mem);
 5647 
 5648   ins_cost(110);
 5649   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 5650   ins_encode %{
 5651     __ leaq($dst$$Register, $mem$$Address);
 5652   %}
 5653   ins_pipe(ialu_reg_reg_fat);
 5654 %}
 5655 
 5656 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 5657 %{
 5658   predicate(CompressedOops::shift() == 0);
 5659   match(Set dst mem);
 5660 
 5661   ins_cost(110);
 5662   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 5663   ins_encode %{
 5664     __ leaq($dst$$Register, $mem$$Address);
 5665   %}
 5666   ins_pipe(ialu_reg_reg_fat);
 5667 %}
 5668 
 5669 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 5670 %{
 5671   predicate(CompressedOops::shift() == 0);
 5672   match(Set dst mem);
 5673 
 5674   ins_cost(110);
 5675   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 5676   ins_encode %{
 5677     __ leaq($dst$$Register, $mem$$Address);
 5678   %}
 5679   ins_pipe(ialu_reg_reg_fat);
 5680 %}
 5681 
 5682 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 5683 %{
 5684   predicate(CompressedOops::shift() == 0);
 5685   match(Set dst mem);
 5686 
 5687   ins_cost(110);
 5688   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 5689   ins_encode %{
 5690     __ leaq($dst$$Register, $mem$$Address);
 5691   %}
 5692   ins_pipe(ialu_reg_reg_fat);
 5693 %}
 5694 
 5695 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 5696 %{
 5697   predicate(CompressedOops::shift() == 0);
 5698   match(Set dst mem);
 5699 
 5700   ins_cost(110);
 5701   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 5702   ins_encode %{
 5703     __ leaq($dst$$Register, $mem$$Address);
 5704   %}
 5705   ins_pipe(ialu_reg_reg_fat);
 5706 %}
 5707 
 5708 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 5709 %{
 5710   predicate(CompressedOops::shift() == 0);
 5711   match(Set dst mem);
 5712 
 5713   ins_cost(110);
 5714   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 5715   ins_encode %{
 5716     __ leaq($dst$$Register, $mem$$Address);
 5717   %}
 5718   ins_pipe(ialu_reg_reg_fat);
 5719 %}
 5720 
 5721 instruct loadConI(rRegI dst, immI src)
 5722 %{
 5723   match(Set dst src);
 5724 
 5725   format %{ "movl    $dst, $src\t# int" %}
 5726   ins_encode %{
 5727     __ movl($dst$$Register, $src$$constant);
 5728   %}
 5729   ins_pipe(ialu_reg_fat); // XXX
 5730 %}
 5731 
 5732 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 5733 %{
 5734   match(Set dst src);
 5735   effect(KILL cr);
 5736 
 5737   ins_cost(50);
 5738   format %{ "xorl    $dst, $dst\t# int" %}
 5739   ins_encode %{
 5740     __ xorl($dst$$Register, $dst$$Register);
 5741   %}
 5742   ins_pipe(ialu_reg);
 5743 %}
 5744 
 5745 instruct loadConL(rRegL dst, immL src)
 5746 %{
 5747   match(Set dst src);
 5748 
 5749   ins_cost(150);
 5750   format %{ "movq    $dst, $src\t# long" %}
 5751   ins_encode %{
 5752     __ mov64($dst$$Register, $src$$constant);
 5753   %}
 5754   ins_pipe(ialu_reg);
 5755 %}
 5756 
 5757 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 5758 %{
 5759   match(Set dst src);
 5760   effect(KILL cr);
 5761 
 5762   ins_cost(50);
 5763   format %{ "xorl    $dst, $dst\t# long" %}
 5764   ins_encode %{
 5765     __ xorl($dst$$Register, $dst$$Register);
 5766   %}
 5767   ins_pipe(ialu_reg); // XXX
 5768 %}
 5769 
 5770 instruct loadConUL32(rRegL dst, immUL32 src)
 5771 %{
 5772   match(Set dst src);
 5773 
 5774   ins_cost(60);
 5775   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 5776   ins_encode %{
 5777     __ movl($dst$$Register, $src$$constant);
 5778   %}
 5779   ins_pipe(ialu_reg);
 5780 %}
 5781 
 5782 instruct loadConL32(rRegL dst, immL32 src)
 5783 %{
 5784   match(Set dst src);
 5785 
 5786   ins_cost(70);
 5787   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 5788   ins_encode %{
 5789     __ movq($dst$$Register, $src$$constant);
 5790   %}
 5791   ins_pipe(ialu_reg);
 5792 %}
 5793 
 5794 instruct loadConP(rRegP dst, immP con) %{
 5795   match(Set dst con);
 5796 
 5797   format %{ "movq    $dst, $con\t# ptr" %}
 5798   ins_encode %{
 5799     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 5800   %}
 5801   ins_pipe(ialu_reg_fat); // XXX
 5802 %}
 5803 
 5804 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 5805 %{
 5806   match(Set dst src);
 5807   effect(KILL cr);
 5808 
 5809   ins_cost(50);
 5810   format %{ "xorl    $dst, $dst\t# ptr" %}
 5811   ins_encode %{
 5812     __ xorl($dst$$Register, $dst$$Register);
 5813   %}
 5814   ins_pipe(ialu_reg);
 5815 %}
 5816 
 5817 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 5818 %{
 5819   match(Set dst src);
 5820   effect(KILL cr);
 5821 
 5822   ins_cost(60);
 5823   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5824   ins_encode %{
 5825     __ movl($dst$$Register, $src$$constant);
 5826   %}
 5827   ins_pipe(ialu_reg);
 5828 %}
 5829 
 5830 instruct loadConF(regF dst, immF con) %{
 5831   match(Set dst con);
 5832   ins_cost(125);
 5833   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5834   ins_encode %{
 5835     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5836   %}
 5837   ins_pipe(pipe_slow);
 5838 %}
 5839 
 5840 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5841   match(Set dst src);
 5842   effect(KILL cr);
 5843   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 5844   ins_encode %{
 5845     __ xorq($dst$$Register, $dst$$Register);
 5846   %}
 5847   ins_pipe(ialu_reg);
 5848 %}
 5849 
 5850 instruct loadConN(rRegN dst, immN src) %{
 5851   match(Set dst src);
 5852 
 5853   ins_cost(125);
 5854   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5855   ins_encode %{
 5856     address con = (address)$src$$constant;
 5857     if (con == NULL) {
 5858       ShouldNotReachHere();
 5859     } else {
 5860       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5861     }
 5862   %}
 5863   ins_pipe(ialu_reg_fat); // XXX
 5864 %}
 5865 
 5866 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5867   match(Set dst src);
 5868 
 5869   ins_cost(125);
 5870   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5871   ins_encode %{
 5872     address con = (address)$src$$constant;
 5873     if (con == NULL) {
 5874       ShouldNotReachHere();
 5875     } else {
 5876       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5877     }
 5878   %}
 5879   ins_pipe(ialu_reg_fat); // XXX
 5880 %}
 5881 
 5882 instruct loadConF0(regF dst, immF0 src)
 5883 %{
 5884   match(Set dst src);
 5885   ins_cost(100);
 5886 
 5887   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5888   ins_encode %{
 5889     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5890   %}
 5891   ins_pipe(pipe_slow);
 5892 %}
 5893 
 5894 // Use the same format since predicate() can not be used here.
 5895 instruct loadConD(regD dst, immD con) %{
 5896   match(Set dst con);
 5897   ins_cost(125);
 5898   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5899   ins_encode %{
 5900     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5901   %}
 5902   ins_pipe(pipe_slow);
 5903 %}
 5904 
 5905 instruct loadConD0(regD dst, immD0 src)
 5906 %{
 5907   match(Set dst src);
 5908   ins_cost(100);
 5909 
 5910   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5911   ins_encode %{
 5912     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5913   %}
 5914   ins_pipe(pipe_slow);
 5915 %}
 5916 
 5917 instruct loadSSI(rRegI dst, stackSlotI src)
 5918 %{
 5919   match(Set dst src);
 5920 
 5921   ins_cost(125);
 5922   format %{ "movl    $dst, $src\t# int stk" %}
 5923   opcode(0x8B);
 5924   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
 5925   ins_pipe(ialu_reg_mem);
 5926 %}
 5927 
 5928 instruct loadSSL(rRegL dst, stackSlotL src)
 5929 %{
 5930   match(Set dst src);
 5931 
 5932   ins_cost(125);
 5933   format %{ "movq    $dst, $src\t# long stk" %}
 5934   opcode(0x8B);
 5935   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5936   ins_pipe(ialu_reg_mem);
 5937 %}
 5938 
 5939 instruct loadSSP(rRegP dst, stackSlotP src)
 5940 %{
 5941   match(Set dst src);
 5942 
 5943   ins_cost(125);
 5944   format %{ "movq    $dst, $src\t# ptr stk" %}
 5945   opcode(0x8B);
 5946   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5947   ins_pipe(ialu_reg_mem);
 5948 %}
 5949 
 5950 instruct loadSSF(regF dst, stackSlotF src)
 5951 %{
 5952   match(Set dst src);
 5953 
 5954   ins_cost(125);
 5955   format %{ "movss   $dst, $src\t# float stk" %}
 5956   ins_encode %{
 5957     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5958   %}
 5959   ins_pipe(pipe_slow); // XXX
 5960 %}
 5961 
 5962 // Use the same format since predicate() can not be used here.
 5963 instruct loadSSD(regD dst, stackSlotD src)
 5964 %{
 5965   match(Set dst src);
 5966 
 5967   ins_cost(125);
 5968   format %{ "movsd   $dst, $src\t# double stk" %}
 5969   ins_encode  %{
 5970     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5971   %}
 5972   ins_pipe(pipe_slow); // XXX
 5973 %}
 5974 
 5975 // Prefetch instructions for allocation.
 5976 // Must be safe to execute with invalid address (cannot fault).
 5977 
 5978 instruct prefetchAlloc( memory mem ) %{
 5979   predicate(AllocatePrefetchInstr==3);
 5980   match(PrefetchAllocation mem);
 5981   ins_cost(125);
 5982 
 5983   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 5984   ins_encode %{
 5985     __ prefetchw($mem$$Address);
 5986   %}
 5987   ins_pipe(ialu_mem);
 5988 %}
 5989 
 5990 instruct prefetchAllocNTA( memory mem ) %{
 5991   predicate(AllocatePrefetchInstr==0);
 5992   match(PrefetchAllocation mem);
 5993   ins_cost(125);
 5994 
 5995   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 5996   ins_encode %{
 5997     __ prefetchnta($mem$$Address);
 5998   %}
 5999   ins_pipe(ialu_mem);
 6000 %}
 6001 
 6002 instruct prefetchAllocT0( memory mem ) %{
 6003   predicate(AllocatePrefetchInstr==1);
 6004   match(PrefetchAllocation mem);
 6005   ins_cost(125);
 6006 
 6007   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 6008   ins_encode %{
 6009     __ prefetcht0($mem$$Address);
 6010   %}
 6011   ins_pipe(ialu_mem);
 6012 %}
 6013 
 6014 instruct prefetchAllocT2( memory mem ) %{
 6015   predicate(AllocatePrefetchInstr==2);
 6016   match(PrefetchAllocation mem);
 6017   ins_cost(125);
 6018 
 6019   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 6020   ins_encode %{
 6021     __ prefetcht2($mem$$Address);
 6022   %}
 6023   ins_pipe(ialu_mem);
 6024 %}
 6025 
 6026 //----------Store Instructions-------------------------------------------------
 6027 
 6028 // Store Byte
 6029 instruct storeB(memory mem, rRegI src)
 6030 %{
 6031   match(Set mem (StoreB mem src));
 6032 
 6033   ins_cost(125); // XXX
 6034   format %{ "movb    $mem, $src\t# byte" %}
 6035   ins_encode %{
 6036     __ movb($mem$$Address, $src$$Register);
 6037   %}
 6038   ins_pipe(ialu_mem_reg);
 6039 %}
 6040 
 6041 // Store Char/Short
 6042 instruct storeC(memory mem, rRegI src)
 6043 %{
 6044   match(Set mem (StoreC mem src));
 6045 
 6046   ins_cost(125); // XXX
 6047   format %{ "movw    $mem, $src\t# char/short" %}
 6048   ins_encode %{
 6049     __ movw($mem$$Address, $src$$Register);
 6050   %}
 6051   ins_pipe(ialu_mem_reg);
 6052 %}
 6053 
 6054 // Store Integer
 6055 instruct storeI(memory mem, rRegI src)
 6056 %{
 6057   match(Set mem (StoreI mem src));
 6058 
 6059   ins_cost(125); // XXX
 6060   format %{ "movl    $mem, $src\t# int" %}
 6061   ins_encode %{
 6062     __ movl($mem$$Address, $src$$Register);
 6063   %}
 6064   ins_pipe(ialu_mem_reg);
 6065 %}
 6066 
 6067 // Store Long
 6068 instruct storeL(memory mem, rRegL src)
 6069 %{
 6070   match(Set mem (StoreL mem src));
 6071 
 6072   ins_cost(125); // XXX
 6073   format %{ "movq    $mem, $src\t# long" %}
 6074   ins_encode %{
 6075     __ movq($mem$$Address, $src$$Register);
 6076   %}
 6077   ins_pipe(ialu_mem_reg); // XXX
 6078 %}
 6079 
 6080 // Store Pointer
 6081 instruct storeP(memory mem, any_RegP src)
 6082 %{
 6083   predicate(n->as_Store()->barrier_data() == 0);
 6084   match(Set mem (StoreP mem src));
 6085 
 6086   ins_cost(125); // XXX
 6087   format %{ "movq    $mem, $src\t# ptr" %}
 6088   ins_encode %{
 6089     __ movq($mem$$Address, $src$$Register);
 6090   %}
 6091   ins_pipe(ialu_mem_reg);
 6092 %}
 6093 
 6094 instruct storeImmP0(memory mem, immP0 zero)
 6095 %{
 6096   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
 6097   match(Set mem (StoreP mem zero));
 6098 
 6099   ins_cost(125); // XXX
 6100   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6101   ins_encode %{
 6102     __ movq($mem$$Address, r12);
 6103   %}
 6104   ins_pipe(ialu_mem_reg);
 6105 %}
 6106 
 6107 // Store NULL Pointer, mark word, or other simple pointer constant.
 6108 instruct storeImmP(memory mem, immP31 src)
 6109 %{
 6110   predicate(n->as_Store()->barrier_data() == 0);
 6111   match(Set mem (StoreP mem src));
 6112 
 6113   ins_cost(150); // XXX
 6114   format %{ "movq    $mem, $src\t# ptr" %}
 6115   ins_encode %{
 6116     __ movq($mem$$Address, $src$$constant);
 6117   %}
 6118   ins_pipe(ialu_mem_imm);
 6119 %}
 6120 
 6121 // Store Compressed Pointer
 6122 instruct storeN(memory mem, rRegN src)
 6123 %{
 6124   match(Set mem (StoreN mem src));
 6125 
 6126   ins_cost(125); // XXX
 6127   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6128   ins_encode %{
 6129     __ movl($mem$$Address, $src$$Register);
 6130   %}
 6131   ins_pipe(ialu_mem_reg);
 6132 %}
 6133 
 6134 instruct storeNKlass(memory mem, rRegN src)
 6135 %{
 6136   match(Set mem (StoreNKlass mem src));
 6137 
 6138   ins_cost(125); // XXX
 6139   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6140   ins_encode %{
 6141     __ movl($mem$$Address, $src$$Register);
 6142   %}
 6143   ins_pipe(ialu_mem_reg);
 6144 %}
 6145 
 6146 instruct storeImmN0(memory mem, immN0 zero)
 6147 %{
 6148   predicate(CompressedOops::base() == NULL);
 6149   match(Set mem (StoreN mem zero));
 6150 
 6151   ins_cost(125); // XXX
 6152   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6153   ins_encode %{
 6154     __ movl($mem$$Address, r12);
 6155   %}
 6156   ins_pipe(ialu_mem_reg);
 6157 %}
 6158 
 6159 instruct storeImmN(memory mem, immN src)
 6160 %{
 6161   match(Set mem (StoreN mem src));
 6162 
 6163   ins_cost(150); // XXX
 6164   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6165   ins_encode %{
 6166     address con = (address)$src$$constant;
 6167     if (con == NULL) {
 6168       __ movl($mem$$Address, 0);
 6169     } else {
 6170       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6171     }
 6172   %}
 6173   ins_pipe(ialu_mem_imm);
 6174 %}
 6175 
 6176 instruct storeImmNKlass(memory mem, immNKlass src)
 6177 %{
 6178   match(Set mem (StoreNKlass mem src));
 6179 
 6180   ins_cost(150); // XXX
 6181   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6182   ins_encode %{
 6183     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6184   %}
 6185   ins_pipe(ialu_mem_imm);
 6186 %}
 6187 
 6188 // Store Integer Immediate
 6189 instruct storeImmI0(memory mem, immI_0 zero)
 6190 %{
 6191   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6192   match(Set mem (StoreI mem zero));
 6193 
 6194   ins_cost(125); // XXX
 6195   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6196   ins_encode %{
 6197     __ movl($mem$$Address, r12);
 6198   %}
 6199   ins_pipe(ialu_mem_reg);
 6200 %}
 6201 
 6202 instruct storeImmI(memory mem, immI src)
 6203 %{
 6204   match(Set mem (StoreI mem src));
 6205 
 6206   ins_cost(150);
 6207   format %{ "movl    $mem, $src\t# int" %}
 6208   ins_encode %{
 6209     __ movl($mem$$Address, $src$$constant);
 6210   %}
 6211   ins_pipe(ialu_mem_imm);
 6212 %}
 6213 
 6214 // Store Long Immediate
 6215 instruct storeImmL0(memory mem, immL0 zero)
 6216 %{
 6217   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6218   match(Set mem (StoreL mem zero));
 6219 
 6220   ins_cost(125); // XXX
 6221   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6222   ins_encode %{
 6223     __ movq($mem$$Address, r12);
 6224   %}
 6225   ins_pipe(ialu_mem_reg);
 6226 %}
 6227 
 6228 instruct storeImmL(memory mem, immL32 src)
 6229 %{
 6230   match(Set mem (StoreL mem src));
 6231 
 6232   ins_cost(150);
 6233   format %{ "movq    $mem, $src\t# long" %}
 6234   ins_encode %{
 6235     __ movq($mem$$Address, $src$$constant);
 6236   %}
 6237   ins_pipe(ialu_mem_imm);
 6238 %}
 6239 
 6240 // Store Short/Char Immediate
 6241 instruct storeImmC0(memory mem, immI_0 zero)
 6242 %{
 6243   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6244   match(Set mem (StoreC mem zero));
 6245 
 6246   ins_cost(125); // XXX
 6247   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6248   ins_encode %{
 6249     __ movw($mem$$Address, r12);
 6250   %}
 6251   ins_pipe(ialu_mem_reg);
 6252 %}
 6253 
 6254 instruct storeImmI16(memory mem, immI16 src)
 6255 %{
 6256   predicate(UseStoreImmI16);
 6257   match(Set mem (StoreC mem src));
 6258 
 6259   ins_cost(150);
 6260   format %{ "movw    $mem, $src\t# short/char" %}
 6261   ins_encode %{
 6262     __ movw($mem$$Address, $src$$constant);
 6263   %}
 6264   ins_pipe(ialu_mem_imm);
 6265 %}
 6266 
 6267 // Store Byte Immediate
 6268 instruct storeImmB0(memory mem, immI_0 zero)
 6269 %{
 6270   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6271   match(Set mem (StoreB mem zero));
 6272 
 6273   ins_cost(125); // XXX
 6274   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6275   ins_encode %{
 6276     __ movb($mem$$Address, r12);
 6277   %}
 6278   ins_pipe(ialu_mem_reg);
 6279 %}
 6280 
 6281 instruct storeImmB(memory mem, immI8 src)
 6282 %{
 6283   match(Set mem (StoreB mem src));
 6284 
 6285   ins_cost(150); // XXX
 6286   format %{ "movb    $mem, $src\t# byte" %}
 6287   ins_encode %{
 6288     __ movb($mem$$Address, $src$$constant);
 6289   %}
 6290   ins_pipe(ialu_mem_imm);
 6291 %}
 6292 
 6293 // Store CMS card-mark Immediate
 6294 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6295 %{
 6296   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6297   match(Set mem (StoreCM mem zero));
 6298 
 6299   ins_cost(125); // XXX
 6300   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6301   ins_encode %{
 6302     __ movb($mem$$Address, r12);
 6303   %}
 6304   ins_pipe(ialu_mem_reg);
 6305 %}
 6306 
 6307 instruct storeImmCM0(memory mem, immI_0 src)
 6308 %{
 6309   match(Set mem (StoreCM mem src));
 6310 
 6311   ins_cost(150); // XXX
 6312   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6313   ins_encode %{
 6314     __ movb($mem$$Address, $src$$constant);
 6315   %}
 6316   ins_pipe(ialu_mem_imm);
 6317 %}
 6318 
 6319 // Store Float
 6320 instruct storeF(memory mem, regF src)
 6321 %{
 6322   match(Set mem (StoreF mem src));
 6323 
 6324   ins_cost(95); // XXX
 6325   format %{ "movss   $mem, $src\t# float" %}
 6326   ins_encode %{
 6327     __ movflt($mem$$Address, $src$$XMMRegister);
 6328   %}
 6329   ins_pipe(pipe_slow); // XXX
 6330 %}
 6331 
 6332 // Store immediate Float value (it is faster than store from XMM register)
 6333 instruct storeF0(memory mem, immF0 zero)
 6334 %{
 6335   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6336   match(Set mem (StoreF mem zero));
 6337 
 6338   ins_cost(25); // XXX
 6339   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6340   ins_encode %{
 6341     __ movl($mem$$Address, r12);
 6342   %}
 6343   ins_pipe(ialu_mem_reg);
 6344 %}
 6345 
 6346 instruct storeF_imm(memory mem, immF src)
 6347 %{
 6348   match(Set mem (StoreF mem src));
 6349 
 6350   ins_cost(50);
 6351   format %{ "movl    $mem, $src\t# float" %}
 6352   ins_encode %{
 6353     __ movl($mem$$Address, jint_cast($src$$constant));
 6354   %}
 6355   ins_pipe(ialu_mem_imm);
 6356 %}
 6357 
 6358 // Store Double
 6359 instruct storeD(memory mem, regD src)
 6360 %{
 6361   match(Set mem (StoreD mem src));
 6362 
 6363   ins_cost(95); // XXX
 6364   format %{ "movsd   $mem, $src\t# double" %}
 6365   ins_encode %{
 6366     __ movdbl($mem$$Address, $src$$XMMRegister);
 6367   %}
 6368   ins_pipe(pipe_slow); // XXX
 6369 %}
 6370 
 6371 // Store immediate double 0.0 (it is faster than store from XMM register)
 6372 instruct storeD0_imm(memory mem, immD0 src)
 6373 %{
 6374   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 6375   match(Set mem (StoreD mem src));
 6376 
 6377   ins_cost(50);
 6378   format %{ "movq    $mem, $src\t# double 0." %}
 6379   ins_encode %{
 6380     __ movq($mem$$Address, $src$$constant);
 6381   %}
 6382   ins_pipe(ialu_mem_imm);
 6383 %}
 6384 
 6385 instruct storeD0(memory mem, immD0 zero)
 6386 %{
 6387   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6388   match(Set mem (StoreD mem zero));
 6389 
 6390   ins_cost(25); // XXX
 6391   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6392   ins_encode %{
 6393     __ movq($mem$$Address, r12);
 6394   %}
 6395   ins_pipe(ialu_mem_reg);
 6396 %}
 6397 
 6398 instruct storeSSI(stackSlotI dst, rRegI src)
 6399 %{
 6400   match(Set dst src);
 6401 
 6402   ins_cost(100);
 6403   format %{ "movl    $dst, $src\t# int stk" %}
 6404   opcode(0x89);
 6405   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6406   ins_pipe( ialu_mem_reg );
 6407 %}
 6408 
 6409 instruct storeSSL(stackSlotL dst, rRegL src)
 6410 %{
 6411   match(Set dst src);
 6412 
 6413   ins_cost(100);
 6414   format %{ "movq    $dst, $src\t# long stk" %}
 6415   opcode(0x89);
 6416   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6417   ins_pipe(ialu_mem_reg);
 6418 %}
 6419 
 6420 instruct storeSSP(stackSlotP dst, rRegP src)
 6421 %{
 6422   match(Set dst src);
 6423 
 6424   ins_cost(100);
 6425   format %{ "movq    $dst, $src\t# ptr stk" %}
 6426   opcode(0x89);
 6427   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6428   ins_pipe(ialu_mem_reg);
 6429 %}
 6430 
 6431 instruct storeSSF(stackSlotF dst, regF src)
 6432 %{
 6433   match(Set dst src);
 6434 
 6435   ins_cost(95); // XXX
 6436   format %{ "movss   $dst, $src\t# float stk" %}
 6437   ins_encode %{
 6438     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6439   %}
 6440   ins_pipe(pipe_slow); // XXX
 6441 %}
 6442 
 6443 instruct storeSSD(stackSlotD dst, regD src)
 6444 %{
 6445   match(Set dst src);
 6446 
 6447   ins_cost(95); // XXX
 6448   format %{ "movsd   $dst, $src\t# double stk" %}
 6449   ins_encode %{
 6450     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6451   %}
 6452   ins_pipe(pipe_slow); // XXX
 6453 %}
 6454 
 6455 instruct cacheWB(indirect addr)
 6456 %{
 6457   predicate(VM_Version::supports_data_cache_line_flush());
 6458   match(CacheWB addr);
 6459 
 6460   ins_cost(100);
 6461   format %{"cache wb $addr" %}
 6462   ins_encode %{
 6463     assert($addr->index_position() < 0, "should be");
 6464     assert($addr$$disp == 0, "should be");
 6465     __ cache_wb(Address($addr$$base$$Register, 0));
 6466   %}
 6467   ins_pipe(pipe_slow); // XXX
 6468 %}
 6469 
 6470 instruct cacheWBPreSync()
 6471 %{
 6472   predicate(VM_Version::supports_data_cache_line_flush());
 6473   match(CacheWBPreSync);
 6474 
 6475   ins_cost(100);
 6476   format %{"cache wb presync" %}
 6477   ins_encode %{
 6478     __ cache_wbsync(true);
 6479   %}
 6480   ins_pipe(pipe_slow); // XXX
 6481 %}
 6482 
 6483 instruct cacheWBPostSync()
 6484 %{
 6485   predicate(VM_Version::supports_data_cache_line_flush());
 6486   match(CacheWBPostSync);
 6487 
 6488   ins_cost(100);
 6489   format %{"cache wb postsync" %}
 6490   ins_encode %{
 6491     __ cache_wbsync(false);
 6492   %}
 6493   ins_pipe(pipe_slow); // XXX
 6494 %}
 6495 
 6496 //----------BSWAP Instructions-------------------------------------------------
 6497 instruct bytes_reverse_int(rRegI dst) %{
 6498   match(Set dst (ReverseBytesI dst));
 6499 
 6500   format %{ "bswapl  $dst" %}
 6501   ins_encode %{
 6502     __ bswapl($dst$$Register);
 6503   %}
 6504   ins_pipe( ialu_reg );
 6505 %}
 6506 
 6507 instruct bytes_reverse_long(rRegL dst) %{
 6508   match(Set dst (ReverseBytesL dst));
 6509 
 6510   format %{ "bswapq  $dst" %}
 6511   ins_encode %{
 6512     __ bswapq($dst$$Register);
 6513   %}
 6514   ins_pipe( ialu_reg);
 6515 %}
 6516 
 6517 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 6518   match(Set dst (ReverseBytesUS dst));
 6519   effect(KILL cr);
 6520 
 6521   format %{ "bswapl  $dst\n\t"
 6522             "shrl    $dst,16\n\t" %}
 6523   ins_encode %{
 6524     __ bswapl($dst$$Register);
 6525     __ shrl($dst$$Register, 16);
 6526   %}
 6527   ins_pipe( ialu_reg );
 6528 %}
 6529 
 6530 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 6531   match(Set dst (ReverseBytesS dst));
 6532   effect(KILL cr);
 6533 
 6534   format %{ "bswapl  $dst\n\t"
 6535             "sar     $dst,16\n\t" %}
 6536   ins_encode %{
 6537     __ bswapl($dst$$Register);
 6538     __ sarl($dst$$Register, 16);
 6539   %}
 6540   ins_pipe( ialu_reg );
 6541 %}
 6542 
 6543 //---------- Zeros Count Instructions ------------------------------------------
 6544 
 6545 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6546   predicate(UseCountLeadingZerosInstruction);
 6547   match(Set dst (CountLeadingZerosI src));
 6548   effect(KILL cr);
 6549 
 6550   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6551   ins_encode %{
 6552     __ lzcntl($dst$$Register, $src$$Register);
 6553   %}
 6554   ins_pipe(ialu_reg);
 6555 %}
 6556 
 6557 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6558   predicate(UseCountLeadingZerosInstruction);
 6559   match(Set dst (CountLeadingZerosI (LoadI src)));
 6560   effect(KILL cr);
 6561   ins_cost(175);
 6562   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6563   ins_encode %{
 6564     __ lzcntl($dst$$Register, $src$$Address);
 6565   %}
 6566   ins_pipe(ialu_reg_mem);
 6567 %}
 6568 
 6569 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 6570   predicate(!UseCountLeadingZerosInstruction);
 6571   match(Set dst (CountLeadingZerosI src));
 6572   effect(KILL cr);
 6573 
 6574   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 6575             "jnz     skip\n\t"
 6576             "movl    $dst, -1\n"
 6577       "skip:\n\t"
 6578             "negl    $dst\n\t"
 6579             "addl    $dst, 31" %}
 6580   ins_encode %{
 6581     Register Rdst = $dst$$Register;
 6582     Register Rsrc = $src$$Register;
 6583     Label skip;
 6584     __ bsrl(Rdst, Rsrc);
 6585     __ jccb(Assembler::notZero, skip);
 6586     __ movl(Rdst, -1);
 6587     __ bind(skip);
 6588     __ negl(Rdst);
 6589     __ addl(Rdst, BitsPerInt - 1);
 6590   %}
 6591   ins_pipe(ialu_reg);
 6592 %}
 6593 
 6594 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6595   predicate(UseCountLeadingZerosInstruction);
 6596   match(Set dst (CountLeadingZerosL src));
 6597   effect(KILL cr);
 6598 
 6599   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6600   ins_encode %{
 6601     __ lzcntq($dst$$Register, $src$$Register);
 6602   %}
 6603   ins_pipe(ialu_reg);
 6604 %}
 6605 
 6606 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6607   predicate(UseCountLeadingZerosInstruction);
 6608   match(Set dst (CountLeadingZerosL (LoadL src)));
 6609   effect(KILL cr);
 6610   ins_cost(175);
 6611   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6612   ins_encode %{
 6613     __ lzcntq($dst$$Register, $src$$Address);
 6614   %}
 6615   ins_pipe(ialu_reg_mem);
 6616 %}
 6617 
 6618 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 6619   predicate(!UseCountLeadingZerosInstruction);
 6620   match(Set dst (CountLeadingZerosL src));
 6621   effect(KILL cr);
 6622 
 6623   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 6624             "jnz     skip\n\t"
 6625             "movl    $dst, -1\n"
 6626       "skip:\n\t"
 6627             "negl    $dst\n\t"
 6628             "addl    $dst, 63" %}
 6629   ins_encode %{
 6630     Register Rdst = $dst$$Register;
 6631     Register Rsrc = $src$$Register;
 6632     Label skip;
 6633     __ bsrq(Rdst, Rsrc);
 6634     __ jccb(Assembler::notZero, skip);
 6635     __ movl(Rdst, -1);
 6636     __ bind(skip);
 6637     __ negl(Rdst);
 6638     __ addl(Rdst, BitsPerLong - 1);
 6639   %}
 6640   ins_pipe(ialu_reg);
 6641 %}
 6642 
 6643 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6644   predicate(UseCountTrailingZerosInstruction);
 6645   match(Set dst (CountTrailingZerosI src));
 6646   effect(KILL cr);
 6647 
 6648   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6649   ins_encode %{
 6650     __ tzcntl($dst$$Register, $src$$Register);
 6651   %}
 6652   ins_pipe(ialu_reg);
 6653 %}
 6654 
 6655 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6656   predicate(UseCountTrailingZerosInstruction);
 6657   match(Set dst (CountTrailingZerosI (LoadI src)));
 6658   effect(KILL cr);
 6659   ins_cost(175);
 6660   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6661   ins_encode %{
 6662     __ tzcntl($dst$$Register, $src$$Address);
 6663   %}
 6664   ins_pipe(ialu_reg_mem);
 6665 %}
 6666 
 6667 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 6668   predicate(!UseCountTrailingZerosInstruction);
 6669   match(Set dst (CountTrailingZerosI src));
 6670   effect(KILL cr);
 6671 
 6672   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 6673             "jnz     done\n\t"
 6674             "movl    $dst, 32\n"
 6675       "done:" %}
 6676   ins_encode %{
 6677     Register Rdst = $dst$$Register;
 6678     Label done;
 6679     __ bsfl(Rdst, $src$$Register);
 6680     __ jccb(Assembler::notZero, done);
 6681     __ movl(Rdst, BitsPerInt);
 6682     __ bind(done);
 6683   %}
 6684   ins_pipe(ialu_reg);
 6685 %}
 6686 
 6687 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6688   predicate(UseCountTrailingZerosInstruction);
 6689   match(Set dst (CountTrailingZerosL src));
 6690   effect(KILL cr);
 6691 
 6692   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6693   ins_encode %{
 6694     __ tzcntq($dst$$Register, $src$$Register);
 6695   %}
 6696   ins_pipe(ialu_reg);
 6697 %}
 6698 
 6699 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6700   predicate(UseCountTrailingZerosInstruction);
 6701   match(Set dst (CountTrailingZerosL (LoadL src)));
 6702   effect(KILL cr);
 6703   ins_cost(175);
 6704   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6705   ins_encode %{
 6706     __ tzcntq($dst$$Register, $src$$Address);
 6707   %}
 6708   ins_pipe(ialu_reg_mem);
 6709 %}
 6710 
 6711 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 6712   predicate(!UseCountTrailingZerosInstruction);
 6713   match(Set dst (CountTrailingZerosL src));
 6714   effect(KILL cr);
 6715 
 6716   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 6717             "jnz     done\n\t"
 6718             "movl    $dst, 64\n"
 6719       "done:" %}
 6720   ins_encode %{
 6721     Register Rdst = $dst$$Register;
 6722     Label done;
 6723     __ bsfq(Rdst, $src$$Register);
 6724     __ jccb(Assembler::notZero, done);
 6725     __ movl(Rdst, BitsPerLong);
 6726     __ bind(done);
 6727   %}
 6728   ins_pipe(ialu_reg);
 6729 %}
 6730 
 6731 //--------------- Reverse Operation Instructions ----------------
 6732 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 6733   predicate(!VM_Version::supports_gfni());
 6734   match(Set dst (ReverseI src));
 6735   effect(TEMP dst, TEMP rtmp, KILL cr);
 6736   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 6737   ins_encode %{
 6738     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 6739   %}
 6740   ins_pipe( ialu_reg );
 6741 %}
 6742 
 6743 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6744   predicate(VM_Version::supports_gfni());
 6745   match(Set dst (ReverseI src));
 6746   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6747   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6748   ins_encode %{
 6749     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 6750   %}
 6751   ins_pipe( ialu_reg );
 6752 %}
 6753 
 6754 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 6755   predicate(!VM_Version::supports_gfni());
 6756   match(Set dst (ReverseL src));
 6757   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 6758   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 6759   ins_encode %{
 6760     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 6761   %}
 6762   ins_pipe( ialu_reg );
 6763 %}
 6764 
 6765 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6766   predicate(VM_Version::supports_gfni());
 6767   match(Set dst (ReverseL src));
 6768   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6769   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6770   ins_encode %{
 6771     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 6772   %}
 6773   ins_pipe( ialu_reg );
 6774 %}
 6775 
 6776 //---------- Population Count Instructions -------------------------------------
 6777 
 6778 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6779   predicate(UsePopCountInstruction);
 6780   match(Set dst (PopCountI src));
 6781   effect(KILL cr);
 6782 
 6783   format %{ "popcnt  $dst, $src" %}
 6784   ins_encode %{
 6785     __ popcntl($dst$$Register, $src$$Register);
 6786   %}
 6787   ins_pipe(ialu_reg);
 6788 %}
 6789 
 6790 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6791   predicate(UsePopCountInstruction);
 6792   match(Set dst (PopCountI (LoadI mem)));
 6793   effect(KILL cr);
 6794 
 6795   format %{ "popcnt  $dst, $mem" %}
 6796   ins_encode %{
 6797     __ popcntl($dst$$Register, $mem$$Address);
 6798   %}
 6799   ins_pipe(ialu_reg);
 6800 %}
 6801 
 6802 // Note: Long.bitCount(long) returns an int.
 6803 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6804   predicate(UsePopCountInstruction);
 6805   match(Set dst (PopCountL src));
 6806   effect(KILL cr);
 6807 
 6808   format %{ "popcnt  $dst, $src" %}
 6809   ins_encode %{
 6810     __ popcntq($dst$$Register, $src$$Register);
 6811   %}
 6812   ins_pipe(ialu_reg);
 6813 %}
 6814 
 6815 // Note: Long.bitCount(long) returns an int.
 6816 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6817   predicate(UsePopCountInstruction);
 6818   match(Set dst (PopCountL (LoadL mem)));
 6819   effect(KILL cr);
 6820 
 6821   format %{ "popcnt  $dst, $mem" %}
 6822   ins_encode %{
 6823     __ popcntq($dst$$Register, $mem$$Address);
 6824   %}
 6825   ins_pipe(ialu_reg);
 6826 %}
 6827 
 6828 
 6829 //----------MemBar Instructions-----------------------------------------------
 6830 // Memory barrier flavors
 6831 
 6832 instruct membar_acquire()
 6833 %{
 6834   match(MemBarAcquire);
 6835   match(LoadFence);
 6836   ins_cost(0);
 6837 
 6838   size(0);
 6839   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6840   ins_encode();
 6841   ins_pipe(empty);
 6842 %}
 6843 
 6844 instruct membar_acquire_lock()
 6845 %{
 6846   match(MemBarAcquireLock);
 6847   ins_cost(0);
 6848 
 6849   size(0);
 6850   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6851   ins_encode();
 6852   ins_pipe(empty);
 6853 %}
 6854 
 6855 instruct membar_release()
 6856 %{
 6857   match(MemBarRelease);
 6858   match(StoreFence);
 6859   ins_cost(0);
 6860 
 6861   size(0);
 6862   format %{ "MEMBAR-release ! (empty encoding)" %}
 6863   ins_encode();
 6864   ins_pipe(empty);
 6865 %}
 6866 
 6867 instruct membar_release_lock()
 6868 %{
 6869   match(MemBarReleaseLock);
 6870   ins_cost(0);
 6871 
 6872   size(0);
 6873   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6874   ins_encode();
 6875   ins_pipe(empty);
 6876 %}
 6877 
 6878 instruct membar_volatile(rFlagsReg cr) %{
 6879   match(MemBarVolatile);
 6880   effect(KILL cr);
 6881   ins_cost(400);
 6882 
 6883   format %{
 6884     $$template
 6885     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6886   %}
 6887   ins_encode %{
 6888     __ membar(Assembler::StoreLoad);
 6889   %}
 6890   ins_pipe(pipe_slow);
 6891 %}
 6892 
 6893 instruct unnecessary_membar_volatile()
 6894 %{
 6895   match(MemBarVolatile);
 6896   predicate(Matcher::post_store_load_barrier(n));
 6897   ins_cost(0);
 6898 
 6899   size(0);
 6900   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6901   ins_encode();
 6902   ins_pipe(empty);
 6903 %}
 6904 
 6905 instruct membar_storestore() %{
 6906   match(MemBarStoreStore);
 6907   match(StoreStoreFence);
 6908   ins_cost(0);
 6909 
 6910   size(0);
 6911   format %{ "MEMBAR-storestore (empty encoding)" %}
 6912   ins_encode( );
 6913   ins_pipe(empty);
 6914 %}
 6915 
 6916 //----------Move Instructions--------------------------------------------------
 6917 
 6918 instruct castX2P(rRegP dst, rRegL src)
 6919 %{
 6920   match(Set dst (CastX2P src));
 6921 
 6922   format %{ "movq    $dst, $src\t# long->ptr" %}
 6923   ins_encode %{
 6924     if ($dst$$reg != $src$$reg) {
 6925       __ movptr($dst$$Register, $src$$Register);
 6926     }
 6927   %}
 6928   ins_pipe(ialu_reg_reg); // XXX
 6929 %}
 6930 
 6931 instruct castP2X(rRegL dst, rRegP src)
 6932 %{
 6933   match(Set dst (CastP2X src));
 6934 
 6935   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6936   ins_encode %{
 6937     if ($dst$$reg != $src$$reg) {
 6938       __ movptr($dst$$Register, $src$$Register);
 6939     }
 6940   %}
 6941   ins_pipe(ialu_reg_reg); // XXX
 6942 %}
 6943 
 6944 // Convert oop into int for vectors alignment masking
 6945 instruct convP2I(rRegI dst, rRegP src)
 6946 %{
 6947   match(Set dst (ConvL2I (CastP2X src)));
 6948 
 6949   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6950   ins_encode %{
 6951     __ movl($dst$$Register, $src$$Register);
 6952   %}
 6953   ins_pipe(ialu_reg_reg); // XXX
 6954 %}
 6955 
 6956 // Convert compressed oop into int for vectors alignment masking
 6957 // in case of 32bit oops (heap < 4Gb).
 6958 instruct convN2I(rRegI dst, rRegN src)
 6959 %{
 6960   predicate(CompressedOops::shift() == 0);
 6961   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6962 
 6963   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6964   ins_encode %{
 6965     __ movl($dst$$Register, $src$$Register);
 6966   %}
 6967   ins_pipe(ialu_reg_reg); // XXX
 6968 %}
 6969 
 6970 // Convert oop pointer into compressed form
 6971 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 6972   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 6973   match(Set dst (EncodeP src));
 6974   effect(KILL cr);
 6975   format %{ "encode_heap_oop $dst,$src" %}
 6976   ins_encode %{
 6977     Register s = $src$$Register;
 6978     Register d = $dst$$Register;
 6979     if (s != d) {
 6980       __ movq(d, s);
 6981     }
 6982     __ encode_heap_oop(d);
 6983   %}
 6984   ins_pipe(ialu_reg_long);
 6985 %}
 6986 
 6987 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 6988   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 6989   match(Set dst (EncodeP src));
 6990   effect(KILL cr);
 6991   format %{ "encode_heap_oop_not_null $dst,$src" %}
 6992   ins_encode %{
 6993     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 6994   %}
 6995   ins_pipe(ialu_reg_long);
 6996 %}
 6997 
 6998 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 6999   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 7000             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 7001   match(Set dst (DecodeN src));
 7002   effect(KILL cr);
 7003   format %{ "decode_heap_oop $dst,$src" %}
 7004   ins_encode %{
 7005     Register s = $src$$Register;
 7006     Register d = $dst$$Register;
 7007     if (s != d) {
 7008       __ movq(d, s);
 7009     }
 7010     __ decode_heap_oop(d);
 7011   %}
 7012   ins_pipe(ialu_reg_long);
 7013 %}
 7014 
 7015 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7016   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 7017             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 7018   match(Set dst (DecodeN src));
 7019   effect(KILL cr);
 7020   format %{ "decode_heap_oop_not_null $dst,$src" %}
 7021   ins_encode %{
 7022     Register s = $src$$Register;
 7023     Register d = $dst$$Register;
 7024     if (s != d) {
 7025       __ decode_heap_oop_not_null(d, s);
 7026     } else {
 7027       __ decode_heap_oop_not_null(d);
 7028     }
 7029   %}
 7030   ins_pipe(ialu_reg_long);
 7031 %}
 7032 
 7033 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7034   match(Set dst (EncodePKlass src));
 7035   effect(TEMP dst, KILL cr);
 7036   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 7037   ins_encode %{
 7038     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7039   %}
 7040   ins_pipe(ialu_reg_long);
 7041 %}
 7042 
 7043 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7044   match(Set dst (DecodeNKlass src));
 7045   effect(TEMP dst, KILL cr);
 7046   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 7047   ins_encode %{
 7048     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7049   %}
 7050   ins_pipe(ialu_reg_long);
 7051 %}
 7052 
 7053 //----------Conditional Move---------------------------------------------------
 7054 // Jump
 7055 // dummy instruction for generating temp registers
 7056 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 7057   match(Jump (LShiftL switch_val shift));
 7058   ins_cost(350);
 7059   predicate(false);
 7060   effect(TEMP dest);
 7061 
 7062   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7063             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 7064   ins_encode %{
 7065     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7066     // to do that and the compiler is using that register as one it can allocate.
 7067     // So we build it all by hand.
 7068     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 7069     // ArrayAddress dispatch(table, index);
 7070     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 7071     __ lea($dest$$Register, $constantaddress);
 7072     __ jmp(dispatch);
 7073   %}
 7074   ins_pipe(pipe_jmp);
 7075 %}
 7076 
 7077 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 7078   match(Jump (AddL (LShiftL switch_val shift) offset));
 7079   ins_cost(350);
 7080   effect(TEMP dest);
 7081 
 7082   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7083             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 7084   ins_encode %{
 7085     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7086     // to do that and the compiler is using that register as one it can allocate.
 7087     // So we build it all by hand.
 7088     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7089     // ArrayAddress dispatch(table, index);
 7090     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7091     __ lea($dest$$Register, $constantaddress);
 7092     __ jmp(dispatch);
 7093   %}
 7094   ins_pipe(pipe_jmp);
 7095 %}
 7096 
 7097 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 7098   match(Jump switch_val);
 7099   ins_cost(350);
 7100   effect(TEMP dest);
 7101 
 7102   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7103             "jmp     [$dest + $switch_val]\n\t" %}
 7104   ins_encode %{
 7105     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7106     // to do that and the compiler is using that register as one it can allocate.
 7107     // So we build it all by hand.
 7108     // Address index(noreg, switch_reg, Address::times_1);
 7109     // ArrayAddress dispatch(table, index);
 7110     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 7111     __ lea($dest$$Register, $constantaddress);
 7112     __ jmp(dispatch);
 7113   %}
 7114   ins_pipe(pipe_jmp);
 7115 %}
 7116 
 7117 // Conditional move
 7118 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7119 %{
 7120   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7121   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7122 
 7123   ins_cost(100); // XXX
 7124   format %{ "setbn$cop $dst\t# signed, int" %}
 7125   ins_encode %{
 7126     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7127     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7128   %}
 7129   ins_pipe(ialu_reg);
 7130 %}
 7131 
 7132 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 7133 %{
 7134   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7135 
 7136   ins_cost(200); // XXX
 7137   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7138   ins_encode %{
 7139     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7140   %}
 7141   ins_pipe(pipe_cmov_reg);
 7142 %}
 7143 
 7144 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7145 %{
 7146   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7147   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7148 
 7149   ins_cost(100); // XXX
 7150   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7151   ins_encode %{
 7152     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7153     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7154   %}
 7155   ins_pipe(ialu_reg);
 7156 %}
 7157 
 7158 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 7159   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7160 
 7161   ins_cost(200); // XXX
 7162   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7163   ins_encode %{
 7164     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7165   %}
 7166   ins_pipe(pipe_cmov_reg);
 7167 %}
 7168 
 7169 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7170 %{
 7171   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7172   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7173 
 7174   ins_cost(100); // XXX
 7175   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7176   ins_encode %{
 7177     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7178     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7179   %}
 7180   ins_pipe(ialu_reg);
 7181 %}
 7182 
 7183 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7184   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7185   ins_cost(200);
 7186   expand %{
 7187     cmovI_regU(cop, cr, dst, src);
 7188   %}
 7189 %}
 7190 
 7191 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7192   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7193   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7194 
 7195   ins_cost(200); // XXX
 7196   format %{ "cmovpl  $dst, $src\n\t"
 7197             "cmovnel $dst, $src" %}
 7198   ins_encode %{
 7199     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7200     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7201   %}
 7202   ins_pipe(pipe_cmov_reg);
 7203 %}
 7204 
 7205 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7206 // inputs of the CMove
 7207 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7208   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7209   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7210 
 7211   ins_cost(200); // XXX
 7212   format %{ "cmovpl  $dst, $src\n\t"
 7213             "cmovnel $dst, $src" %}
 7214   ins_encode %{
 7215     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7216     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7217   %}
 7218   ins_pipe(pipe_cmov_reg);
 7219 %}
 7220 
 7221 // Conditional move
 7222 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 7223   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7224 
 7225   ins_cost(250); // XXX
 7226   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7227   ins_encode %{
 7228     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7229   %}
 7230   ins_pipe(pipe_cmov_mem);
 7231 %}
 7232 
 7233 // Conditional move
 7234 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 7235 %{
 7236   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7237 
 7238   ins_cost(250); // XXX
 7239   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7240   ins_encode %{
 7241     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7242   %}
 7243   ins_pipe(pipe_cmov_mem);
 7244 %}
 7245 
 7246 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 7247   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7248   ins_cost(250);
 7249   expand %{
 7250     cmovI_memU(cop, cr, dst, src);
 7251   %}
 7252 %}
 7253 
 7254 // Conditional move
 7255 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 7256 %{
 7257   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7258 
 7259   ins_cost(200); // XXX
 7260   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 7261   ins_encode %{
 7262     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7263   %}
 7264   ins_pipe(pipe_cmov_reg);
 7265 %}
 7266 
 7267 // Conditional move
 7268 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 7269 %{
 7270   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7271 
 7272   ins_cost(200); // XXX
 7273   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 7274   ins_encode %{
 7275     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7276   %}
 7277   ins_pipe(pipe_cmov_reg);
 7278 %}
 7279 
 7280 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7281   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7282   ins_cost(200);
 7283   expand %{
 7284     cmovN_regU(cop, cr, dst, src);
 7285   %}
 7286 %}
 7287 
 7288 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7289   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7290   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7291 
 7292   ins_cost(200); // XXX
 7293   format %{ "cmovpl  $dst, $src\n\t"
 7294             "cmovnel $dst, $src" %}
 7295   ins_encode %{
 7296     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7297     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7298   %}
 7299   ins_pipe(pipe_cmov_reg);
 7300 %}
 7301 
 7302 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7303 // inputs of the CMove
 7304 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7305   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7306   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 7307 
 7308   ins_cost(200); // XXX
 7309   format %{ "cmovpl  $dst, $src\n\t"
 7310             "cmovnel $dst, $src" %}
 7311   ins_encode %{
 7312     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7313     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7314   %}
 7315   ins_pipe(pipe_cmov_reg);
 7316 %}
 7317 
 7318 // Conditional move
 7319 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 7320 %{
 7321   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7322 
 7323   ins_cost(200); // XXX
 7324   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 7325   ins_encode %{
 7326     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7327   %}
 7328   ins_pipe(pipe_cmov_reg);  // XXX
 7329 %}
 7330 
 7331 // Conditional move
 7332 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 7333 %{
 7334   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7335 
 7336   ins_cost(200); // XXX
 7337   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 7338   ins_encode %{
 7339     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7340   %}
 7341   ins_pipe(pipe_cmov_reg); // XXX
 7342 %}
 7343 
 7344 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7345   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7346   ins_cost(200);
 7347   expand %{
 7348     cmovP_regU(cop, cr, dst, src);
 7349   %}
 7350 %}
 7351 
 7352 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7353   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7354   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7355 
 7356   ins_cost(200); // XXX
 7357   format %{ "cmovpq  $dst, $src\n\t"
 7358             "cmovneq $dst, $src" %}
 7359   ins_encode %{
 7360     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7361     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7362   %}
 7363   ins_pipe(pipe_cmov_reg);
 7364 %}
 7365 
 7366 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7367 // inputs of the CMove
 7368 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7369   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7370   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 7371 
 7372   ins_cost(200); // XXX
 7373   format %{ "cmovpq  $dst, $src\n\t"
 7374             "cmovneq $dst, $src" %}
 7375   ins_encode %{
 7376     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7377     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7378   %}
 7379   ins_pipe(pipe_cmov_reg);
 7380 %}
 7381 
 7382 // DISABLED: Requires the ADLC to emit a bottom_type call that
 7383 // correctly meets the two pointer arguments; one is an incoming
 7384 // register but the other is a memory operand.  ALSO appears to
 7385 // be buggy with implicit null checks.
 7386 //
 7387 //// Conditional move
 7388 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
 7389 //%{
 7390 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7391 //  ins_cost(250);
 7392 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7393 //  opcode(0x0F,0x40);
 7394 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7395 //  ins_pipe( pipe_cmov_mem );
 7396 //%}
 7397 //
 7398 //// Conditional move
 7399 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
 7400 //%{
 7401 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7402 //  ins_cost(250);
 7403 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7404 //  opcode(0x0F,0x40);
 7405 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7406 //  ins_pipe( pipe_cmov_mem );
 7407 //%}
 7408 
 7409 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7410 %{
 7411   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7412   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7413 
 7414   ins_cost(100); // XXX
 7415   format %{ "setbn$cop $dst\t# signed, long" %}
 7416   ins_encode %{
 7417     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7418     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7419   %}
 7420   ins_pipe(ialu_reg);
 7421 %}
 7422 
 7423 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 7424 %{
 7425   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7426 
 7427   ins_cost(200); // XXX
 7428   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7429   ins_encode %{
 7430     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7431   %}
 7432   ins_pipe(pipe_cmov_reg);  // XXX
 7433 %}
 7434 
 7435 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 7436 %{
 7437   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7438 
 7439   ins_cost(200); // XXX
 7440   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7441   ins_encode %{
 7442     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7443   %}
 7444   ins_pipe(pipe_cmov_mem);  // XXX
 7445 %}
 7446 
 7447 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7448 %{
 7449   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7450   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7451 
 7452   ins_cost(100); // XXX
 7453   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7454   ins_encode %{
 7455     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7456     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7457   %}
 7458   ins_pipe(ialu_reg);
 7459 %}
 7460 
 7461 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 7462 %{
 7463   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7464 
 7465   ins_cost(200); // XXX
 7466   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7467   ins_encode %{
 7468     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7469   %}
 7470   ins_pipe(pipe_cmov_reg); // XXX
 7471 %}
 7472 
 7473 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7474 %{
 7475   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7476   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7477 
 7478   ins_cost(100); // XXX
 7479   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7480   ins_encode %{
 7481     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7482     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7483   %}
 7484   ins_pipe(ialu_reg);
 7485 %}
 7486 
 7487 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7488   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7489   ins_cost(200);
 7490   expand %{
 7491     cmovL_regU(cop, cr, dst, src);
 7492   %}
 7493 %}
 7494 
 7495 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7496   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7497   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7498 
 7499   ins_cost(200); // XXX
 7500   format %{ "cmovpq  $dst, $src\n\t"
 7501             "cmovneq $dst, $src" %}
 7502   ins_encode %{
 7503     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7504     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7505   %}
 7506   ins_pipe(pipe_cmov_reg);
 7507 %}
 7508 
 7509 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7510 // inputs of the CMove
 7511 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7512   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7513   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7514 
 7515   ins_cost(200); // XXX
 7516   format %{ "cmovpq  $dst, $src\n\t"
 7517             "cmovneq $dst, $src" %}
 7518   ins_encode %{
 7519     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7520     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7521   %}
 7522   ins_pipe(pipe_cmov_reg);
 7523 %}
 7524 
 7525 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 7526 %{
 7527   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7528 
 7529   ins_cost(200); // XXX
 7530   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7531   ins_encode %{
 7532     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7533   %}
 7534   ins_pipe(pipe_cmov_mem); // XXX
 7535 %}
 7536 
 7537 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 7538   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7539   ins_cost(200);
 7540   expand %{
 7541     cmovL_memU(cop, cr, dst, src);
 7542   %}
 7543 %}
 7544 
 7545 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 7546 %{
 7547   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7548 
 7549   ins_cost(200); // XXX
 7550   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7551             "movss     $dst, $src\n"
 7552     "skip:" %}
 7553   ins_encode %{
 7554     Label Lskip;
 7555     // Invert sense of branch from sense of CMOV
 7556     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7557     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7558     __ bind(Lskip);
 7559   %}
 7560   ins_pipe(pipe_slow);
 7561 %}
 7562 
 7563 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
 7564 // %{
 7565 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
 7566 
 7567 //   ins_cost(200); // XXX
 7568 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7569 //             "movss     $dst, $src\n"
 7570 //     "skip:" %}
 7571 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
 7572 //   ins_pipe(pipe_slow);
 7573 // %}
 7574 
 7575 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 7576 %{
 7577   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7578 
 7579   ins_cost(200); // XXX
 7580   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 7581             "movss     $dst, $src\n"
 7582     "skip:" %}
 7583   ins_encode %{
 7584     Label Lskip;
 7585     // Invert sense of branch from sense of CMOV
 7586     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7587     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7588     __ bind(Lskip);
 7589   %}
 7590   ins_pipe(pipe_slow);
 7591 %}
 7592 
 7593 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 7594   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7595   ins_cost(200);
 7596   expand %{
 7597     cmovF_regU(cop, cr, dst, src);
 7598   %}
 7599 %}
 7600 
 7601 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 7602 %{
 7603   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7604 
 7605   ins_cost(200); // XXX
 7606   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 7607             "movsd     $dst, $src\n"
 7608     "skip:" %}
 7609   ins_encode %{
 7610     Label Lskip;
 7611     // Invert sense of branch from sense of CMOV
 7612     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7613     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7614     __ bind(Lskip);
 7615   %}
 7616   ins_pipe(pipe_slow);
 7617 %}
 7618 
 7619 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 7620 %{
 7621   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7622 
 7623   ins_cost(200); // XXX
 7624   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 7625             "movsd     $dst, $src\n"
 7626     "skip:" %}
 7627   ins_encode %{
 7628     Label Lskip;
 7629     // Invert sense of branch from sense of CMOV
 7630     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7631     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7632     __ bind(Lskip);
 7633   %}
 7634   ins_pipe(pipe_slow);
 7635 %}
 7636 
 7637 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 7638   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7639   ins_cost(200);
 7640   expand %{
 7641     cmovD_regU(cop, cr, dst, src);
 7642   %}
 7643 %}
 7644 
 7645 //----------Arithmetic Instructions--------------------------------------------
 7646 //----------Addition Instructions----------------------------------------------
 7647 
 7648 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7649 %{
 7650   match(Set dst (AddI dst src));
 7651   effect(KILL cr);
 7652 
 7653   format %{ "addl    $dst, $src\t# int" %}
 7654   ins_encode %{
 7655     __ addl($dst$$Register, $src$$Register);
 7656   %}
 7657   ins_pipe(ialu_reg_reg);
 7658 %}
 7659 
 7660 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 7661 %{
 7662   match(Set dst (AddI dst src));
 7663   effect(KILL cr);
 7664 
 7665   format %{ "addl    $dst, $src\t# int" %}
 7666   ins_encode %{
 7667     __ addl($dst$$Register, $src$$constant);
 7668   %}
 7669   ins_pipe( ialu_reg );
 7670 %}
 7671 
 7672 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7673 %{
 7674   match(Set dst (AddI dst (LoadI src)));
 7675   effect(KILL cr);
 7676 
 7677   ins_cost(150); // XXX
 7678   format %{ "addl    $dst, $src\t# int" %}
 7679   ins_encode %{
 7680     __ addl($dst$$Register, $src$$Address);
 7681   %}
 7682   ins_pipe(ialu_reg_mem);
 7683 %}
 7684 
 7685 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7686 %{
 7687   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7688   effect(KILL cr);
 7689 
 7690   ins_cost(150); // XXX
 7691   format %{ "addl    $dst, $src\t# int" %}
 7692   ins_encode %{
 7693     __ addl($dst$$Address, $src$$Register);
 7694   %}
 7695   ins_pipe(ialu_mem_reg);
 7696 %}
 7697 
 7698 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 7699 %{
 7700   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7701   effect(KILL cr);
 7702 
 7703   ins_cost(125); // XXX
 7704   format %{ "addl    $dst, $src\t# int" %}
 7705   ins_encode %{
 7706     __ addl($dst$$Address, $src$$constant);
 7707   %}
 7708   ins_pipe(ialu_mem_imm);
 7709 %}
 7710 
 7711 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 7712 %{
 7713   predicate(UseIncDec);
 7714   match(Set dst (AddI dst src));
 7715   effect(KILL cr);
 7716 
 7717   format %{ "incl    $dst\t# int" %}
 7718   ins_encode %{
 7719     __ incrementl($dst$$Register);
 7720   %}
 7721   ins_pipe(ialu_reg);
 7722 %}
 7723 
 7724 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 7725 %{
 7726   predicate(UseIncDec);
 7727   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7728   effect(KILL cr);
 7729 
 7730   ins_cost(125); // XXX
 7731   format %{ "incl    $dst\t# int" %}
 7732   ins_encode %{
 7733     __ incrementl($dst$$Address);
 7734   %}
 7735   ins_pipe(ialu_mem_imm);
 7736 %}
 7737 
 7738 // XXX why does that use AddI
 7739 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 7740 %{
 7741   predicate(UseIncDec);
 7742   match(Set dst (AddI dst src));
 7743   effect(KILL cr);
 7744 
 7745   format %{ "decl    $dst\t# int" %}
 7746   ins_encode %{
 7747     __ decrementl($dst$$Register);
 7748   %}
 7749   ins_pipe(ialu_reg);
 7750 %}
 7751 
 7752 // XXX why does that use AddI
 7753 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 7754 %{
 7755   predicate(UseIncDec);
 7756   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7757   effect(KILL cr);
 7758 
 7759   ins_cost(125); // XXX
 7760   format %{ "decl    $dst\t# int" %}
 7761   ins_encode %{
 7762     __ decrementl($dst$$Address);
 7763   %}
 7764   ins_pipe(ialu_mem_imm);
 7765 %}
 7766 
 7767 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 7768 %{
 7769   predicate(VM_Version::supports_fast_2op_lea());
 7770   match(Set dst (AddI (LShiftI index scale) disp));
 7771 
 7772   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 7773   ins_encode %{
 7774     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7775     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7776   %}
 7777   ins_pipe(ialu_reg_reg);
 7778 %}
 7779 
 7780 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 7781 %{
 7782   predicate(VM_Version::supports_fast_3op_lea());
 7783   match(Set dst (AddI (AddI base index) disp));
 7784 
 7785   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 7786   ins_encode %{
 7787     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7788   %}
 7789   ins_pipe(ialu_reg_reg);
 7790 %}
 7791 
 7792 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 7793 %{
 7794   predicate(VM_Version::supports_fast_2op_lea());
 7795   match(Set dst (AddI base (LShiftI index scale)));
 7796 
 7797   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 7798   ins_encode %{
 7799     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7800     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7801   %}
 7802   ins_pipe(ialu_reg_reg);
 7803 %}
 7804 
 7805 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 7806 %{
 7807   predicate(VM_Version::supports_fast_3op_lea());
 7808   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 7809 
 7810   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 7811   ins_encode %{
 7812     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7813     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7814   %}
 7815   ins_pipe(ialu_reg_reg);
 7816 %}
 7817 
 7818 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7819 %{
 7820   match(Set dst (AddL dst src));
 7821   effect(KILL cr);
 7822 
 7823   format %{ "addq    $dst, $src\t# long" %}
 7824   ins_encode %{
 7825     __ addq($dst$$Register, $src$$Register);
 7826   %}
 7827   ins_pipe(ialu_reg_reg);
 7828 %}
 7829 
 7830 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 7831 %{
 7832   match(Set dst (AddL dst src));
 7833   effect(KILL cr);
 7834 
 7835   format %{ "addq    $dst, $src\t# long" %}
 7836   ins_encode %{
 7837     __ addq($dst$$Register, $src$$constant);
 7838   %}
 7839   ins_pipe( ialu_reg );
 7840 %}
 7841 
 7842 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7843 %{
 7844   match(Set dst (AddL dst (LoadL src)));
 7845   effect(KILL cr);
 7846 
 7847   ins_cost(150); // XXX
 7848   format %{ "addq    $dst, $src\t# long" %}
 7849   ins_encode %{
 7850     __ addq($dst$$Register, $src$$Address);
 7851   %}
 7852   ins_pipe(ialu_reg_mem);
 7853 %}
 7854 
 7855 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7856 %{
 7857   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7858   effect(KILL cr);
 7859 
 7860   ins_cost(150); // XXX
 7861   format %{ "addq    $dst, $src\t# long" %}
 7862   ins_encode %{
 7863     __ addq($dst$$Address, $src$$Register);
 7864   %}
 7865   ins_pipe(ialu_mem_reg);
 7866 %}
 7867 
 7868 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7869 %{
 7870   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7871   effect(KILL cr);
 7872 
 7873   ins_cost(125); // XXX
 7874   format %{ "addq    $dst, $src\t# long" %}
 7875   ins_encode %{
 7876     __ addq($dst$$Address, $src$$constant);
 7877   %}
 7878   ins_pipe(ialu_mem_imm);
 7879 %}
 7880 
 7881 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7882 %{
 7883   predicate(UseIncDec);
 7884   match(Set dst (AddL dst src));
 7885   effect(KILL cr);
 7886 
 7887   format %{ "incq    $dst\t# long" %}
 7888   ins_encode %{
 7889     __ incrementq($dst$$Register);
 7890   %}
 7891   ins_pipe(ialu_reg);
 7892 %}
 7893 
 7894 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7895 %{
 7896   predicate(UseIncDec);
 7897   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7898   effect(KILL cr);
 7899 
 7900   ins_cost(125); // XXX
 7901   format %{ "incq    $dst\t# long" %}
 7902   ins_encode %{
 7903     __ incrementq($dst$$Address);
 7904   %}
 7905   ins_pipe(ialu_mem_imm);
 7906 %}
 7907 
 7908 // XXX why does that use AddL
 7909 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7910 %{
 7911   predicate(UseIncDec);
 7912   match(Set dst (AddL dst src));
 7913   effect(KILL cr);
 7914 
 7915   format %{ "decq    $dst\t# long" %}
 7916   ins_encode %{
 7917     __ decrementq($dst$$Register);
 7918   %}
 7919   ins_pipe(ialu_reg);
 7920 %}
 7921 
 7922 // XXX why does that use AddL
 7923 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7924 %{
 7925   predicate(UseIncDec);
 7926   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7927   effect(KILL cr);
 7928 
 7929   ins_cost(125); // XXX
 7930   format %{ "decq    $dst\t# long" %}
 7931   ins_encode %{
 7932     __ decrementq($dst$$Address);
 7933   %}
 7934   ins_pipe(ialu_mem_imm);
 7935 %}
 7936 
 7937 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7938 %{
 7939   predicate(VM_Version::supports_fast_2op_lea());
 7940   match(Set dst (AddL (LShiftL index scale) disp));
 7941 
 7942   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7943   ins_encode %{
 7944     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7945     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7946   %}
 7947   ins_pipe(ialu_reg_reg);
 7948 %}
 7949 
 7950 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7951 %{
 7952   predicate(VM_Version::supports_fast_3op_lea());
 7953   match(Set dst (AddL (AddL base index) disp));
 7954 
 7955   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7956   ins_encode %{
 7957     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7958   %}
 7959   ins_pipe(ialu_reg_reg);
 7960 %}
 7961 
 7962 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7963 %{
 7964   predicate(VM_Version::supports_fast_2op_lea());
 7965   match(Set dst (AddL base (LShiftL index scale)));
 7966 
 7967   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 7968   ins_encode %{
 7969     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7970     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7971   %}
 7972   ins_pipe(ialu_reg_reg);
 7973 %}
 7974 
 7975 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 7976 %{
 7977   predicate(VM_Version::supports_fast_3op_lea());
 7978   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 7979 
 7980   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 7981   ins_encode %{
 7982     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7983     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7984   %}
 7985   ins_pipe(ialu_reg_reg);
 7986 %}
 7987 
 7988 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 7989 %{
 7990   match(Set dst (AddP dst src));
 7991   effect(KILL cr);
 7992 
 7993   format %{ "addq    $dst, $src\t# ptr" %}
 7994   ins_encode %{
 7995     __ addq($dst$$Register, $src$$Register);
 7996   %}
 7997   ins_pipe(ialu_reg_reg);
 7998 %}
 7999 
 8000 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 8001 %{
 8002   match(Set dst (AddP dst src));
 8003   effect(KILL cr);
 8004 
 8005   format %{ "addq    $dst, $src\t# ptr" %}
 8006   ins_encode %{
 8007     __ addq($dst$$Register, $src$$constant);
 8008   %}
 8009   ins_pipe( ialu_reg );
 8010 %}
 8011 
 8012 // XXX addP mem ops ????
 8013 
 8014 instruct checkCastPP(rRegP dst)
 8015 %{
 8016   match(Set dst (CheckCastPP dst));
 8017 
 8018   size(0);
 8019   format %{ "# checkcastPP of $dst" %}
 8020   ins_encode(/* empty encoding */);
 8021   ins_pipe(empty);
 8022 %}
 8023 
 8024 instruct castPP(rRegP dst)
 8025 %{
 8026   match(Set dst (CastPP dst));
 8027 
 8028   size(0);
 8029   format %{ "# castPP of $dst" %}
 8030   ins_encode(/* empty encoding */);
 8031   ins_pipe(empty);
 8032 %}
 8033 
 8034 instruct castII(rRegI dst)
 8035 %{
 8036   match(Set dst (CastII dst));
 8037 
 8038   size(0);
 8039   format %{ "# castII of $dst" %}
 8040   ins_encode(/* empty encoding */);
 8041   ins_cost(0);
 8042   ins_pipe(empty);
 8043 %}
 8044 
 8045 instruct castLL(rRegL dst)
 8046 %{
 8047   match(Set dst (CastLL dst));
 8048 
 8049   size(0);
 8050   format %{ "# castLL of $dst" %}
 8051   ins_encode(/* empty encoding */);
 8052   ins_cost(0);
 8053   ins_pipe(empty);
 8054 %}
 8055 
 8056 instruct castFF(regF dst)
 8057 %{
 8058   match(Set dst (CastFF dst));
 8059 
 8060   size(0);
 8061   format %{ "# castFF of $dst" %}
 8062   ins_encode(/* empty encoding */);
 8063   ins_cost(0);
 8064   ins_pipe(empty);
 8065 %}
 8066 
 8067 instruct castDD(regD dst)
 8068 %{
 8069   match(Set dst (CastDD dst));
 8070 
 8071   size(0);
 8072   format %{ "# castDD of $dst" %}
 8073   ins_encode(/* empty encoding */);
 8074   ins_cost(0);
 8075   ins_pipe(empty);
 8076 %}
 8077 
 8078 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 8079 instruct compareAndSwapP(rRegI res,
 8080                          memory mem_ptr,
 8081                          rax_RegP oldval, rRegP newval,
 8082                          rFlagsReg cr)
 8083 %{
 8084   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8085   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 8086   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 8087   effect(KILL cr, KILL oldval);
 8088 
 8089   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8090             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8091             "sete    $res\n\t"
 8092             "movzbl  $res, $res" %}
 8093   ins_encode %{
 8094     __ lock();
 8095     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8096     __ setb(Assembler::equal, $res$$Register);
 8097     __ movzbl($res$$Register, $res$$Register);
 8098   %}
 8099   ins_pipe( pipe_cmpxchg );
 8100 %}
 8101 
 8102 instruct compareAndSwapL(rRegI res,
 8103                          memory mem_ptr,
 8104                          rax_RegL oldval, rRegL newval,
 8105                          rFlagsReg cr)
 8106 %{
 8107   predicate(VM_Version::supports_cx8());
 8108   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 8109   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 8110   effect(KILL cr, KILL oldval);
 8111 
 8112   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8113             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8114             "sete    $res\n\t"
 8115             "movzbl  $res, $res" %}
 8116   ins_encode %{
 8117     __ lock();
 8118     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8119     __ setb(Assembler::equal, $res$$Register);
 8120     __ movzbl($res$$Register, $res$$Register);
 8121   %}
 8122   ins_pipe( pipe_cmpxchg );
 8123 %}
 8124 
 8125 instruct compareAndSwapI(rRegI res,
 8126                          memory mem_ptr,
 8127                          rax_RegI oldval, rRegI newval,
 8128                          rFlagsReg cr)
 8129 %{
 8130   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 8131   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 8132   effect(KILL cr, KILL oldval);
 8133 
 8134   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8135             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8136             "sete    $res\n\t"
 8137             "movzbl  $res, $res" %}
 8138   ins_encode %{
 8139     __ lock();
 8140     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8141     __ setb(Assembler::equal, $res$$Register);
 8142     __ movzbl($res$$Register, $res$$Register);
 8143   %}
 8144   ins_pipe( pipe_cmpxchg );
 8145 %}
 8146 
 8147 instruct compareAndSwapB(rRegI res,
 8148                          memory mem_ptr,
 8149                          rax_RegI oldval, rRegI newval,
 8150                          rFlagsReg cr)
 8151 %{
 8152   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 8153   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 8154   effect(KILL cr, KILL oldval);
 8155 
 8156   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8157             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8158             "sete    $res\n\t"
 8159             "movzbl  $res, $res" %}
 8160   ins_encode %{
 8161     __ lock();
 8162     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8163     __ setb(Assembler::equal, $res$$Register);
 8164     __ movzbl($res$$Register, $res$$Register);
 8165   %}
 8166   ins_pipe( pipe_cmpxchg );
 8167 %}
 8168 
 8169 instruct compareAndSwapS(rRegI res,
 8170                          memory mem_ptr,
 8171                          rax_RegI oldval, rRegI newval,
 8172                          rFlagsReg cr)
 8173 %{
 8174   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 8175   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 8176   effect(KILL cr, KILL oldval);
 8177 
 8178   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8179             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8180             "sete    $res\n\t"
 8181             "movzbl  $res, $res" %}
 8182   ins_encode %{
 8183     __ lock();
 8184     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8185     __ setb(Assembler::equal, $res$$Register);
 8186     __ movzbl($res$$Register, $res$$Register);
 8187   %}
 8188   ins_pipe( pipe_cmpxchg );
 8189 %}
 8190 
 8191 instruct compareAndSwapN(rRegI res,
 8192                           memory mem_ptr,
 8193                           rax_RegN oldval, rRegN newval,
 8194                           rFlagsReg cr) %{
 8195   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 8196   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 8197   effect(KILL cr, KILL oldval);
 8198 
 8199   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8200             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8201             "sete    $res\n\t"
 8202             "movzbl  $res, $res" %}
 8203   ins_encode %{
 8204     __ lock();
 8205     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8206     __ setb(Assembler::equal, $res$$Register);
 8207     __ movzbl($res$$Register, $res$$Register);
 8208   %}
 8209   ins_pipe( pipe_cmpxchg );
 8210 %}
 8211 
 8212 instruct compareAndExchangeB(
 8213                          memory mem_ptr,
 8214                          rax_RegI oldval, rRegI newval,
 8215                          rFlagsReg cr)
 8216 %{
 8217   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 8218   effect(KILL cr);
 8219 
 8220   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8221             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8222   ins_encode %{
 8223     __ lock();
 8224     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8225   %}
 8226   ins_pipe( pipe_cmpxchg );
 8227 %}
 8228 
 8229 instruct compareAndExchangeS(
 8230                          memory mem_ptr,
 8231                          rax_RegI oldval, rRegI newval,
 8232                          rFlagsReg cr)
 8233 %{
 8234   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 8235   effect(KILL cr);
 8236 
 8237   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8238             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8239   ins_encode %{
 8240     __ lock();
 8241     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8242   %}
 8243   ins_pipe( pipe_cmpxchg );
 8244 %}
 8245 
 8246 instruct compareAndExchangeI(
 8247                          memory mem_ptr,
 8248                          rax_RegI oldval, rRegI newval,
 8249                          rFlagsReg cr)
 8250 %{
 8251   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 8252   effect(KILL cr);
 8253 
 8254   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8255             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8256   ins_encode %{
 8257     __ lock();
 8258     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8259   %}
 8260   ins_pipe( pipe_cmpxchg );
 8261 %}
 8262 
 8263 instruct compareAndExchangeL(
 8264                          memory mem_ptr,
 8265                          rax_RegL oldval, rRegL newval,
 8266                          rFlagsReg cr)
 8267 %{
 8268   predicate(VM_Version::supports_cx8());
 8269   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 8270   effect(KILL cr);
 8271 
 8272   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8273             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8274   ins_encode %{
 8275     __ lock();
 8276     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8277   %}
 8278   ins_pipe( pipe_cmpxchg );
 8279 %}
 8280 
 8281 instruct compareAndExchangeN(
 8282                           memory mem_ptr,
 8283                           rax_RegN oldval, rRegN newval,
 8284                           rFlagsReg cr) %{
 8285   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 8286   effect(KILL cr);
 8287 
 8288   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8289             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8290   ins_encode %{
 8291     __ lock();
 8292     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8293   %}
 8294   ins_pipe( pipe_cmpxchg );
 8295 %}
 8296 
 8297 instruct compareAndExchangeP(
 8298                          memory mem_ptr,
 8299                          rax_RegP oldval, rRegP newval,
 8300                          rFlagsReg cr)
 8301 %{
 8302   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8303   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 8304   effect(KILL cr);
 8305 
 8306   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8307             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8308   ins_encode %{
 8309     __ lock();
 8310     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8311   %}
 8312   ins_pipe( pipe_cmpxchg );
 8313 %}
 8314 
 8315 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8316   predicate(n->as_LoadStore()->result_not_used());
 8317   match(Set dummy (GetAndAddB mem add));
 8318   effect(KILL cr);
 8319   format %{ "ADDB  [$mem],$add" %}
 8320   ins_encode %{
 8321     __ lock();
 8322     __ addb($mem$$Address, $add$$constant);
 8323   %}
 8324   ins_pipe( pipe_cmpxchg );
 8325 %}
 8326 
 8327 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
 8328   match(Set newval (GetAndAddB mem newval));
 8329   effect(KILL cr);
 8330   format %{ "XADDB  [$mem],$newval" %}
 8331   ins_encode %{
 8332     __ lock();
 8333     __ xaddb($mem$$Address, $newval$$Register);
 8334   %}
 8335   ins_pipe( pipe_cmpxchg );
 8336 %}
 8337 
 8338 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8339   predicate(n->as_LoadStore()->result_not_used());
 8340   match(Set dummy (GetAndAddS mem add));
 8341   effect(KILL cr);
 8342   format %{ "ADDW  [$mem],$add" %}
 8343   ins_encode %{
 8344     __ lock();
 8345     __ addw($mem$$Address, $add$$constant);
 8346   %}
 8347   ins_pipe( pipe_cmpxchg );
 8348 %}
 8349 
 8350 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
 8351   match(Set newval (GetAndAddS mem newval));
 8352   effect(KILL cr);
 8353   format %{ "XADDW  [$mem],$newval" %}
 8354   ins_encode %{
 8355     __ lock();
 8356     __ xaddw($mem$$Address, $newval$$Register);
 8357   %}
 8358   ins_pipe( pipe_cmpxchg );
 8359 %}
 8360 
 8361 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8362   predicate(n->as_LoadStore()->result_not_used());
 8363   match(Set dummy (GetAndAddI mem add));
 8364   effect(KILL cr);
 8365   format %{ "ADDL  [$mem],$add" %}
 8366   ins_encode %{
 8367     __ lock();
 8368     __ addl($mem$$Address, $add$$constant);
 8369   %}
 8370   ins_pipe( pipe_cmpxchg );
 8371 %}
 8372 
 8373 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
 8374   match(Set newval (GetAndAddI mem newval));
 8375   effect(KILL cr);
 8376   format %{ "XADDL  [$mem],$newval" %}
 8377   ins_encode %{
 8378     __ lock();
 8379     __ xaddl($mem$$Address, $newval$$Register);
 8380   %}
 8381   ins_pipe( pipe_cmpxchg );
 8382 %}
 8383 
 8384 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 8385   predicate(n->as_LoadStore()->result_not_used());
 8386   match(Set dummy (GetAndAddL mem add));
 8387   effect(KILL cr);
 8388   format %{ "ADDQ  [$mem],$add" %}
 8389   ins_encode %{
 8390     __ lock();
 8391     __ addq($mem$$Address, $add$$constant);
 8392   %}
 8393   ins_pipe( pipe_cmpxchg );
 8394 %}
 8395 
 8396 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
 8397   match(Set newval (GetAndAddL mem newval));
 8398   effect(KILL cr);
 8399   format %{ "XADDQ  [$mem],$newval" %}
 8400   ins_encode %{
 8401     __ lock();
 8402     __ xaddq($mem$$Address, $newval$$Register);
 8403   %}
 8404   ins_pipe( pipe_cmpxchg );
 8405 %}
 8406 
 8407 instruct xchgB( memory mem, rRegI newval) %{
 8408   match(Set newval (GetAndSetB mem newval));
 8409   format %{ "XCHGB  $newval,[$mem]" %}
 8410   ins_encode %{
 8411     __ xchgb($newval$$Register, $mem$$Address);
 8412   %}
 8413   ins_pipe( pipe_cmpxchg );
 8414 %}
 8415 
 8416 instruct xchgS( memory mem, rRegI newval) %{
 8417   match(Set newval (GetAndSetS mem newval));
 8418   format %{ "XCHGW  $newval,[$mem]" %}
 8419   ins_encode %{
 8420     __ xchgw($newval$$Register, $mem$$Address);
 8421   %}
 8422   ins_pipe( pipe_cmpxchg );
 8423 %}
 8424 
 8425 instruct xchgI( memory mem, rRegI newval) %{
 8426   match(Set newval (GetAndSetI mem newval));
 8427   format %{ "XCHGL  $newval,[$mem]" %}
 8428   ins_encode %{
 8429     __ xchgl($newval$$Register, $mem$$Address);
 8430   %}
 8431   ins_pipe( pipe_cmpxchg );
 8432 %}
 8433 
 8434 instruct xchgL( memory mem, rRegL newval) %{
 8435   match(Set newval (GetAndSetL mem newval));
 8436   format %{ "XCHGL  $newval,[$mem]" %}
 8437   ins_encode %{
 8438     __ xchgq($newval$$Register, $mem$$Address);
 8439   %}
 8440   ins_pipe( pipe_cmpxchg );
 8441 %}
 8442 
 8443 instruct xchgP( memory mem, rRegP newval) %{
 8444   match(Set newval (GetAndSetP mem newval));
 8445   predicate(n->as_LoadStore()->barrier_data() == 0);
 8446   format %{ "XCHGQ  $newval,[$mem]" %}
 8447   ins_encode %{
 8448     __ xchgq($newval$$Register, $mem$$Address);
 8449   %}
 8450   ins_pipe( pipe_cmpxchg );
 8451 %}
 8452 
 8453 instruct xchgN( memory mem, rRegN newval) %{
 8454   match(Set newval (GetAndSetN mem newval));
 8455   format %{ "XCHGL  $newval,$mem]" %}
 8456   ins_encode %{
 8457     __ xchgl($newval$$Register, $mem$$Address);
 8458   %}
 8459   ins_pipe( pipe_cmpxchg );
 8460 %}
 8461 
 8462 //----------Abs Instructions-------------------------------------------
 8463 
 8464 // Integer Absolute Instructions
 8465 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8466 %{
 8467   match(Set dst (AbsI src));
 8468   effect(TEMP dst, KILL cr);
 8469   format %{ "xorl    $dst, $dst\t# abs int\n\t"
 8470             "subl    $dst, $src\n\t"
 8471             "cmovll  $dst, $src" %}
 8472   ins_encode %{
 8473     __ xorl($dst$$Register, $dst$$Register);
 8474     __ subl($dst$$Register, $src$$Register);
 8475     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
 8476   %}
 8477 
 8478   ins_pipe(ialu_reg_reg);
 8479 %}
 8480 
 8481 // Long Absolute Instructions
 8482 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8483 %{
 8484   match(Set dst (AbsL src));
 8485   effect(TEMP dst, KILL cr);
 8486   format %{ "xorl    $dst, $dst\t# abs long\n\t"
 8487             "subq    $dst, $src\n\t"
 8488             "cmovlq  $dst, $src" %}
 8489   ins_encode %{
 8490     __ xorl($dst$$Register, $dst$$Register);
 8491     __ subq($dst$$Register, $src$$Register);
 8492     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
 8493   %}
 8494 
 8495   ins_pipe(ialu_reg_reg);
 8496 %}
 8497 
 8498 //----------Subtraction Instructions-------------------------------------------
 8499 
 8500 // Integer Subtraction Instructions
 8501 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8502 %{
 8503   match(Set dst (SubI dst src));
 8504   effect(KILL cr);
 8505 
 8506   format %{ "subl    $dst, $src\t# int" %}
 8507   ins_encode %{
 8508     __ subl($dst$$Register, $src$$Register);
 8509   %}
 8510   ins_pipe(ialu_reg_reg);
 8511 %}
 8512 
 8513 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 8514 %{
 8515   match(Set dst (SubI dst (LoadI src)));
 8516   effect(KILL cr);
 8517 
 8518   ins_cost(150);
 8519   format %{ "subl    $dst, $src\t# int" %}
 8520   ins_encode %{
 8521     __ subl($dst$$Register, $src$$Address);
 8522   %}
 8523   ins_pipe(ialu_reg_mem);
 8524 %}
 8525 
 8526 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 8527 %{
 8528   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 8529   effect(KILL cr);
 8530 
 8531   ins_cost(150);
 8532   format %{ "subl    $dst, $src\t# int" %}
 8533   ins_encode %{
 8534     __ subl($dst$$Address, $src$$Register);
 8535   %}
 8536   ins_pipe(ialu_mem_reg);
 8537 %}
 8538 
 8539 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8540 %{
 8541   match(Set dst (SubL dst src));
 8542   effect(KILL cr);
 8543 
 8544   format %{ "subq    $dst, $src\t# long" %}
 8545   ins_encode %{
 8546     __ subq($dst$$Register, $src$$Register);
 8547   %}
 8548   ins_pipe(ialu_reg_reg);
 8549 %}
 8550 
 8551 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 8552 %{
 8553   match(Set dst (SubL dst (LoadL src)));
 8554   effect(KILL cr);
 8555 
 8556   ins_cost(150);
 8557   format %{ "subq    $dst, $src\t# long" %}
 8558   ins_encode %{
 8559     __ subq($dst$$Register, $src$$Address);
 8560   %}
 8561   ins_pipe(ialu_reg_mem);
 8562 %}
 8563 
 8564 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 8565 %{
 8566   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 8567   effect(KILL cr);
 8568 
 8569   ins_cost(150);
 8570   format %{ "subq    $dst, $src\t# long" %}
 8571   ins_encode %{
 8572     __ subq($dst$$Address, $src$$Register);
 8573   %}
 8574   ins_pipe(ialu_mem_reg);
 8575 %}
 8576 
 8577 // Subtract from a pointer
 8578 // XXX hmpf???
 8579 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 8580 %{
 8581   match(Set dst (AddP dst (SubI zero src)));
 8582   effect(KILL cr);
 8583 
 8584   format %{ "subq    $dst, $src\t# ptr - int" %}
 8585   opcode(0x2B);
 8586   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
 8587   ins_pipe(ialu_reg_reg);
 8588 %}
 8589 
 8590 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 8591 %{
 8592   match(Set dst (SubI zero dst));
 8593   effect(KILL cr);
 8594 
 8595   format %{ "negl    $dst\t# int" %}
 8596   ins_encode %{
 8597     __ negl($dst$$Register);
 8598   %}
 8599   ins_pipe(ialu_reg);
 8600 %}
 8601 
 8602 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 8603 %{
 8604   match(Set dst (NegI dst));
 8605   effect(KILL cr);
 8606 
 8607   format %{ "negl    $dst\t# int" %}
 8608   ins_encode %{
 8609     __ negl($dst$$Register);
 8610   %}
 8611   ins_pipe(ialu_reg);
 8612 %}
 8613 
 8614 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 8615 %{
 8616   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 8617   effect(KILL cr);
 8618 
 8619   format %{ "negl    $dst\t# int" %}
 8620   ins_encode %{
 8621     __ negl($dst$$Address);
 8622   %}
 8623   ins_pipe(ialu_reg);
 8624 %}
 8625 
 8626 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 8627 %{
 8628   match(Set dst (SubL zero dst));
 8629   effect(KILL cr);
 8630 
 8631   format %{ "negq    $dst\t# long" %}
 8632   ins_encode %{
 8633     __ negq($dst$$Register);
 8634   %}
 8635   ins_pipe(ialu_reg);
 8636 %}
 8637 
 8638 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 8639 %{
 8640   match(Set dst (NegL dst));
 8641   effect(KILL cr);
 8642 
 8643   format %{ "negq    $dst\t# int" %}
 8644   ins_encode %{
 8645     __ negq($dst$$Register);
 8646   %}
 8647   ins_pipe(ialu_reg);
 8648 %}
 8649 
 8650 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 8651 %{
 8652   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 8653   effect(KILL cr);
 8654 
 8655   format %{ "negq    $dst\t# long" %}
 8656   ins_encode %{
 8657     __ negq($dst$$Address);
 8658   %}
 8659   ins_pipe(ialu_reg);
 8660 %}
 8661 
 8662 //----------Multiplication/Division Instructions-------------------------------
 8663 // Integer Multiplication Instructions
 8664 // Multiply Register
 8665 
 8666 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8667 %{
 8668   match(Set dst (MulI dst src));
 8669   effect(KILL cr);
 8670 
 8671   ins_cost(300);
 8672   format %{ "imull   $dst, $src\t# int" %}
 8673   ins_encode %{
 8674     __ imull($dst$$Register, $src$$Register);
 8675   %}
 8676   ins_pipe(ialu_reg_reg_alu0);
 8677 %}
 8678 
 8679 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 8680 %{
 8681   match(Set dst (MulI src imm));
 8682   effect(KILL cr);
 8683 
 8684   ins_cost(300);
 8685   format %{ "imull   $dst, $src, $imm\t# int" %}
 8686   ins_encode %{
 8687     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 8688   %}
 8689   ins_pipe(ialu_reg_reg_alu0);
 8690 %}
 8691 
 8692 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 8693 %{
 8694   match(Set dst (MulI dst (LoadI src)));
 8695   effect(KILL cr);
 8696 
 8697   ins_cost(350);
 8698   format %{ "imull   $dst, $src\t# int" %}
 8699   ins_encode %{
 8700     __ imull($dst$$Register, $src$$Address);
 8701   %}
 8702   ins_pipe(ialu_reg_mem_alu0);
 8703 %}
 8704 
 8705 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 8706 %{
 8707   match(Set dst (MulI (LoadI src) imm));
 8708   effect(KILL cr);
 8709 
 8710   ins_cost(300);
 8711   format %{ "imull   $dst, $src, $imm\t# int" %}
 8712   ins_encode %{
 8713     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 8714   %}
 8715   ins_pipe(ialu_reg_mem_alu0);
 8716 %}
 8717 
 8718 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 8719 %{
 8720   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 8721   effect(KILL cr, KILL src2);
 8722 
 8723   expand %{ mulI_rReg(dst, src1, cr);
 8724            mulI_rReg(src2, src3, cr);
 8725            addI_rReg(dst, src2, cr); %}
 8726 %}
 8727 
 8728 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8729 %{
 8730   match(Set dst (MulL dst src));
 8731   effect(KILL cr);
 8732 
 8733   ins_cost(300);
 8734   format %{ "imulq   $dst, $src\t# long" %}
 8735   ins_encode %{
 8736     __ imulq($dst$$Register, $src$$Register);
 8737   %}
 8738   ins_pipe(ialu_reg_reg_alu0);
 8739 %}
 8740 
 8741 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 8742 %{
 8743   match(Set dst (MulL src imm));
 8744   effect(KILL cr);
 8745 
 8746   ins_cost(300);
 8747   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8748   ins_encode %{
 8749     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 8750   %}
 8751   ins_pipe(ialu_reg_reg_alu0);
 8752 %}
 8753 
 8754 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 8755 %{
 8756   match(Set dst (MulL dst (LoadL src)));
 8757   effect(KILL cr);
 8758 
 8759   ins_cost(350);
 8760   format %{ "imulq   $dst, $src\t# long" %}
 8761   ins_encode %{
 8762     __ imulq($dst$$Register, $src$$Address);
 8763   %}
 8764   ins_pipe(ialu_reg_mem_alu0);
 8765 %}
 8766 
 8767 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 8768 %{
 8769   match(Set dst (MulL (LoadL src) imm));
 8770   effect(KILL cr);
 8771 
 8772   ins_cost(300);
 8773   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8774   ins_encode %{
 8775     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 8776   %}
 8777   ins_pipe(ialu_reg_mem_alu0);
 8778 %}
 8779 
 8780 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8781 %{
 8782   match(Set dst (MulHiL src rax));
 8783   effect(USE_KILL rax, KILL cr);
 8784 
 8785   ins_cost(300);
 8786   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 8787   ins_encode %{
 8788     __ imulq($src$$Register);
 8789   %}
 8790   ins_pipe(ialu_reg_reg_alu0);
 8791 %}
 8792 
 8793 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8794 %{
 8795   match(Set dst (UMulHiL src rax));
 8796   effect(USE_KILL rax, KILL cr);
 8797 
 8798   ins_cost(300);
 8799   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8800   ins_encode %{
 8801     __ mulq($src$$Register);
 8802   %}
 8803   ins_pipe(ialu_reg_reg_alu0);
 8804 %}
 8805 
 8806 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8807                    rFlagsReg cr)
 8808 %{
 8809   match(Set rax (DivI rax div));
 8810   effect(KILL rdx, KILL cr);
 8811 
 8812   ins_cost(30*100+10*100); // XXX
 8813   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8814             "jne,s   normal\n\t"
 8815             "xorl    rdx, rdx\n\t"
 8816             "cmpl    $div, -1\n\t"
 8817             "je,s    done\n"
 8818     "normal: cdql\n\t"
 8819             "idivl   $div\n"
 8820     "done:"        %}
 8821   ins_encode(cdql_enc(div));
 8822   ins_pipe(ialu_reg_reg_alu0);
 8823 %}
 8824 
 8825 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8826                    rFlagsReg cr)
 8827 %{
 8828   match(Set rax (DivL rax div));
 8829   effect(KILL rdx, KILL cr);
 8830 
 8831   ins_cost(30*100+10*100); // XXX
 8832   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8833             "cmpq    rax, rdx\n\t"
 8834             "jne,s   normal\n\t"
 8835             "xorl    rdx, rdx\n\t"
 8836             "cmpq    $div, -1\n\t"
 8837             "je,s    done\n"
 8838     "normal: cdqq\n\t"
 8839             "idivq   $div\n"
 8840     "done:"        %}
 8841   ins_encode(cdqq_enc(div));
 8842   ins_pipe(ialu_reg_reg_alu0);
 8843 %}
 8844 
 8845 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8846 %{
 8847   match(Set rax (UDivI rax div));
 8848   effect(KILL rdx, KILL cr);
 8849 
 8850   ins_cost(300);
 8851   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8852   ins_encode %{
 8853     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8854   %}
 8855   ins_pipe(ialu_reg_reg_alu0);
 8856 %}
 8857 
 8858 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8859 %{
 8860   match(Set rax (UDivL rax div));
 8861   effect(KILL rdx, KILL cr);
 8862 
 8863   ins_cost(300);
 8864   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8865   ins_encode %{
 8866      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8867   %}
 8868   ins_pipe(ialu_reg_reg_alu0);
 8869 %}
 8870 
 8871 // Integer DIVMOD with Register, both quotient and mod results
 8872 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8873                              rFlagsReg cr)
 8874 %{
 8875   match(DivModI rax div);
 8876   effect(KILL cr);
 8877 
 8878   ins_cost(30*100+10*100); // XXX
 8879   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8880             "jne,s   normal\n\t"
 8881             "xorl    rdx, rdx\n\t"
 8882             "cmpl    $div, -1\n\t"
 8883             "je,s    done\n"
 8884     "normal: cdql\n\t"
 8885             "idivl   $div\n"
 8886     "done:"        %}
 8887   ins_encode(cdql_enc(div));
 8888   ins_pipe(pipe_slow);
 8889 %}
 8890 
 8891 // Long DIVMOD with Register, both quotient and mod results
 8892 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8893                              rFlagsReg cr)
 8894 %{
 8895   match(DivModL rax div);
 8896   effect(KILL cr);
 8897 
 8898   ins_cost(30*100+10*100); // XXX
 8899   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8900             "cmpq    rax, rdx\n\t"
 8901             "jne,s   normal\n\t"
 8902             "xorl    rdx, rdx\n\t"
 8903             "cmpq    $div, -1\n\t"
 8904             "je,s    done\n"
 8905     "normal: cdqq\n\t"
 8906             "idivq   $div\n"
 8907     "done:"        %}
 8908   ins_encode(cdqq_enc(div));
 8909   ins_pipe(pipe_slow);
 8910 %}
 8911 
 8912 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8913 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 8914                               no_rax_rdx_RegI div, rFlagsReg cr)
 8915 %{
 8916   match(UDivModI rax div);
 8917   effect(TEMP tmp, KILL cr);
 8918 
 8919   ins_cost(300);
 8920   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 8921             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 8922           %}
 8923   ins_encode %{
 8924     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8925   %}
 8926   ins_pipe(pipe_slow);
 8927 %}
 8928 
 8929 // Unsigned long DIVMOD with Register, both quotient and mod results
 8930 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 8931                               no_rax_rdx_RegL div, rFlagsReg cr)
 8932 %{
 8933   match(UDivModL rax div);
 8934   effect(TEMP tmp, KILL cr);
 8935 
 8936   ins_cost(300);
 8937   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 8938             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 8939           %}
 8940   ins_encode %{
 8941     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8942   %}
 8943   ins_pipe(pipe_slow);
 8944 %}
 8945 
 8946 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 8947                    rFlagsReg cr)
 8948 %{
 8949   match(Set rdx (ModI rax div));
 8950   effect(KILL rax, KILL cr);
 8951 
 8952   ins_cost(300); // XXX
 8953   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 8954             "jne,s   normal\n\t"
 8955             "xorl    rdx, rdx\n\t"
 8956             "cmpl    $div, -1\n\t"
 8957             "je,s    done\n"
 8958     "normal: cdql\n\t"
 8959             "idivl   $div\n"
 8960     "done:"        %}
 8961   ins_encode(cdql_enc(div));
 8962   ins_pipe(ialu_reg_reg_alu0);
 8963 %}
 8964 
 8965 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 8966                    rFlagsReg cr)
 8967 %{
 8968   match(Set rdx (ModL rax div));
 8969   effect(KILL rax, KILL cr);
 8970 
 8971   ins_cost(300); // XXX
 8972   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 8973             "cmpq    rax, rdx\n\t"
 8974             "jne,s   normal\n\t"
 8975             "xorl    rdx, rdx\n\t"
 8976             "cmpq    $div, -1\n\t"
 8977             "je,s    done\n"
 8978     "normal: cdqq\n\t"
 8979             "idivq   $div\n"
 8980     "done:"        %}
 8981   ins_encode(cdqq_enc(div));
 8982   ins_pipe(ialu_reg_reg_alu0);
 8983 %}
 8984 
 8985 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 8986 %{
 8987   match(Set rdx (UModI rax div));
 8988   effect(KILL rax, KILL cr);
 8989 
 8990   ins_cost(300);
 8991   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 8992   ins_encode %{
 8993     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 8994   %}
 8995   ins_pipe(ialu_reg_reg_alu0);
 8996 %}
 8997 
 8998 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 8999 %{
 9000   match(Set rdx (UModL rax div));
 9001   effect(KILL rax, KILL cr);
 9002 
 9003   ins_cost(300);
 9004   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 9005   ins_encode %{
 9006     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 9007   %}
 9008   ins_pipe(ialu_reg_reg_alu0);
 9009 %}
 9010 
 9011 // Integer Shift Instructions
 9012 // Shift Left by one, two, three
 9013 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
 9014 %{
 9015   match(Set dst (LShiftI dst shift));
 9016   effect(KILL cr);
 9017 
 9018   format %{ "sall    $dst, $shift" %}
 9019   ins_encode %{
 9020     __ sall($dst$$Register, $shift$$constant);
 9021   %}
 9022   ins_pipe(ialu_reg);
 9023 %}
 9024 
 9025 // Shift Left by 8-bit immediate
 9026 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9027 %{
 9028   match(Set dst (LShiftI dst shift));
 9029   effect(KILL cr);
 9030 
 9031   format %{ "sall    $dst, $shift" %}
 9032   ins_encode %{
 9033     __ sall($dst$$Register, $shift$$constant);
 9034   %}
 9035   ins_pipe(ialu_reg);
 9036 %}
 9037 
 9038 // Shift Left by 8-bit immediate
 9039 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9040 %{
 9041   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9042   effect(KILL cr);
 9043 
 9044   format %{ "sall    $dst, $shift" %}
 9045   ins_encode %{
 9046     __ sall($dst$$Address, $shift$$constant);
 9047   %}
 9048   ins_pipe(ialu_mem_imm);
 9049 %}
 9050 
 9051 // Shift Left by variable
 9052 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9053 %{
 9054   predicate(!VM_Version::supports_bmi2());
 9055   match(Set dst (LShiftI dst shift));
 9056   effect(KILL cr);
 9057 
 9058   format %{ "sall    $dst, $shift" %}
 9059   ins_encode %{
 9060     __ sall($dst$$Register);
 9061   %}
 9062   ins_pipe(ialu_reg_reg);
 9063 %}
 9064 
 9065 // Shift Left by variable
 9066 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9067 %{
 9068   predicate(!VM_Version::supports_bmi2());
 9069   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9070   effect(KILL cr);
 9071 
 9072   format %{ "sall    $dst, $shift" %}
 9073   ins_encode %{
 9074     __ sall($dst$$Address);
 9075   %}
 9076   ins_pipe(ialu_mem_reg);
 9077 %}
 9078 
 9079 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9080 %{
 9081   predicate(VM_Version::supports_bmi2());
 9082   match(Set dst (LShiftI src shift));
 9083 
 9084   format %{ "shlxl   $dst, $src, $shift" %}
 9085   ins_encode %{
 9086     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 9087   %}
 9088   ins_pipe(ialu_reg_reg);
 9089 %}
 9090 
 9091 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9092 %{
 9093   predicate(VM_Version::supports_bmi2());
 9094   match(Set dst (LShiftI (LoadI src) shift));
 9095   ins_cost(175);
 9096   format %{ "shlxl   $dst, $src, $shift" %}
 9097   ins_encode %{
 9098     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 9099   %}
 9100   ins_pipe(ialu_reg_mem);
 9101 %}
 9102 
 9103 // Arithmetic Shift Right by 8-bit immediate
 9104 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9105 %{
 9106   match(Set dst (RShiftI dst shift));
 9107   effect(KILL cr);
 9108 
 9109   format %{ "sarl    $dst, $shift" %}
 9110   ins_encode %{
 9111     __ sarl($dst$$Register, $shift$$constant);
 9112   %}
 9113   ins_pipe(ialu_mem_imm);
 9114 %}
 9115 
 9116 // Arithmetic Shift Right by 8-bit immediate
 9117 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9118 %{
 9119   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9120   effect(KILL cr);
 9121 
 9122   format %{ "sarl    $dst, $shift" %}
 9123   ins_encode %{
 9124     __ sarl($dst$$Address, $shift$$constant);
 9125   %}
 9126   ins_pipe(ialu_mem_imm);
 9127 %}
 9128 
 9129 // Arithmetic Shift Right by variable
 9130 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9131 %{
 9132   predicate(!VM_Version::supports_bmi2());
 9133   match(Set dst (RShiftI dst shift));
 9134   effect(KILL cr);
 9135   format %{ "sarl    $dst, $shift" %}
 9136   ins_encode %{
 9137     __ sarl($dst$$Register);
 9138   %}
 9139   ins_pipe(ialu_reg_reg);
 9140 %}
 9141 
 9142 // Arithmetic Shift Right by variable
 9143 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9144 %{
 9145   predicate(!VM_Version::supports_bmi2());
 9146   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9147   effect(KILL cr);
 9148 
 9149   format %{ "sarl    $dst, $shift" %}
 9150   ins_encode %{
 9151     __ sarl($dst$$Address);
 9152   %}
 9153   ins_pipe(ialu_mem_reg);
 9154 %}
 9155 
 9156 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9157 %{
 9158   predicate(VM_Version::supports_bmi2());
 9159   match(Set dst (RShiftI src shift));
 9160 
 9161   format %{ "sarxl   $dst, $src, $shift" %}
 9162   ins_encode %{
 9163     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 9164   %}
 9165   ins_pipe(ialu_reg_reg);
 9166 %}
 9167 
 9168 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9169 %{
 9170   predicate(VM_Version::supports_bmi2());
 9171   match(Set dst (RShiftI (LoadI src) shift));
 9172   ins_cost(175);
 9173   format %{ "sarxl   $dst, $src, $shift" %}
 9174   ins_encode %{
 9175     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 9176   %}
 9177   ins_pipe(ialu_reg_mem);
 9178 %}
 9179 
 9180 // Logical Shift Right by 8-bit immediate
 9181 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9182 %{
 9183   match(Set dst (URShiftI dst shift));
 9184   effect(KILL cr);
 9185 
 9186   format %{ "shrl    $dst, $shift" %}
 9187   ins_encode %{
 9188     __ shrl($dst$$Register, $shift$$constant);
 9189   %}
 9190   ins_pipe(ialu_reg);
 9191 %}
 9192 
 9193 // Logical Shift Right by 8-bit immediate
 9194 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9195 %{
 9196   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9197   effect(KILL cr);
 9198 
 9199   format %{ "shrl    $dst, $shift" %}
 9200   ins_encode %{
 9201     __ shrl($dst$$Address, $shift$$constant);
 9202   %}
 9203   ins_pipe(ialu_mem_imm);
 9204 %}
 9205 
 9206 // Logical Shift Right by variable
 9207 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9208 %{
 9209   predicate(!VM_Version::supports_bmi2());
 9210   match(Set dst (URShiftI dst shift));
 9211   effect(KILL cr);
 9212 
 9213   format %{ "shrl    $dst, $shift" %}
 9214   ins_encode %{
 9215     __ shrl($dst$$Register);
 9216   %}
 9217   ins_pipe(ialu_reg_reg);
 9218 %}
 9219 
 9220 // Logical Shift Right by variable
 9221 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9222 %{
 9223   predicate(!VM_Version::supports_bmi2());
 9224   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9225   effect(KILL cr);
 9226 
 9227   format %{ "shrl    $dst, $shift" %}
 9228   ins_encode %{
 9229     __ shrl($dst$$Address);
 9230   %}
 9231   ins_pipe(ialu_mem_reg);
 9232 %}
 9233 
 9234 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9235 %{
 9236   predicate(VM_Version::supports_bmi2());
 9237   match(Set dst (URShiftI src shift));
 9238 
 9239   format %{ "shrxl   $dst, $src, $shift" %}
 9240   ins_encode %{
 9241     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 9242   %}
 9243   ins_pipe(ialu_reg_reg);
 9244 %}
 9245 
 9246 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9247 %{
 9248   predicate(VM_Version::supports_bmi2());
 9249   match(Set dst (URShiftI (LoadI src) shift));
 9250   ins_cost(175);
 9251   format %{ "shrxl   $dst, $src, $shift" %}
 9252   ins_encode %{
 9253     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 9254   %}
 9255   ins_pipe(ialu_reg_mem);
 9256 %}
 9257 
 9258 // Long Shift Instructions
 9259 // Shift Left by one, two, three
 9260 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
 9261 %{
 9262   match(Set dst (LShiftL dst shift));
 9263   effect(KILL cr);
 9264 
 9265   format %{ "salq    $dst, $shift" %}
 9266   ins_encode %{
 9267     __ salq($dst$$Register, $shift$$constant);
 9268   %}
 9269   ins_pipe(ialu_reg);
 9270 %}
 9271 
 9272 // Shift Left by 8-bit immediate
 9273 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9274 %{
 9275   match(Set dst (LShiftL dst shift));
 9276   effect(KILL cr);
 9277 
 9278   format %{ "salq    $dst, $shift" %}
 9279   ins_encode %{
 9280     __ salq($dst$$Register, $shift$$constant);
 9281   %}
 9282   ins_pipe(ialu_reg);
 9283 %}
 9284 
 9285 // Shift Left by 8-bit immediate
 9286 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9287 %{
 9288   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9289   effect(KILL cr);
 9290 
 9291   format %{ "salq    $dst, $shift" %}
 9292   ins_encode %{
 9293     __ salq($dst$$Address, $shift$$constant);
 9294   %}
 9295   ins_pipe(ialu_mem_imm);
 9296 %}
 9297 
 9298 // Shift Left by variable
 9299 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9300 %{
 9301   predicate(!VM_Version::supports_bmi2());
 9302   match(Set dst (LShiftL dst shift));
 9303   effect(KILL cr);
 9304 
 9305   format %{ "salq    $dst, $shift" %}
 9306   ins_encode %{
 9307     __ salq($dst$$Register);
 9308   %}
 9309   ins_pipe(ialu_reg_reg);
 9310 %}
 9311 
 9312 // Shift Left by variable
 9313 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9314 %{
 9315   predicate(!VM_Version::supports_bmi2());
 9316   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9317   effect(KILL cr);
 9318 
 9319   format %{ "salq    $dst, $shift" %}
 9320   ins_encode %{
 9321     __ salq($dst$$Address);
 9322   %}
 9323   ins_pipe(ialu_mem_reg);
 9324 %}
 9325 
 9326 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9327 %{
 9328   predicate(VM_Version::supports_bmi2());
 9329   match(Set dst (LShiftL src shift));
 9330 
 9331   format %{ "shlxq   $dst, $src, $shift" %}
 9332   ins_encode %{
 9333     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 9334   %}
 9335   ins_pipe(ialu_reg_reg);
 9336 %}
 9337 
 9338 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9339 %{
 9340   predicate(VM_Version::supports_bmi2());
 9341   match(Set dst (LShiftL (LoadL src) shift));
 9342   ins_cost(175);
 9343   format %{ "shlxq   $dst, $src, $shift" %}
 9344   ins_encode %{
 9345     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 9346   %}
 9347   ins_pipe(ialu_reg_mem);
 9348 %}
 9349 
 9350 // Arithmetic Shift Right by 8-bit immediate
 9351 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 9352 %{
 9353   match(Set dst (RShiftL dst shift));
 9354   effect(KILL cr);
 9355 
 9356   format %{ "sarq    $dst, $shift" %}
 9357   ins_encode %{
 9358     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 9359   %}
 9360   ins_pipe(ialu_mem_imm);
 9361 %}
 9362 
 9363 // Arithmetic Shift Right by 8-bit immediate
 9364 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 9365 %{
 9366   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9367   effect(KILL cr);
 9368 
 9369   format %{ "sarq    $dst, $shift" %}
 9370   ins_encode %{
 9371     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 9372   %}
 9373   ins_pipe(ialu_mem_imm);
 9374 %}
 9375 
 9376 // Arithmetic Shift Right by variable
 9377 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9378 %{
 9379   predicate(!VM_Version::supports_bmi2());
 9380   match(Set dst (RShiftL dst shift));
 9381   effect(KILL cr);
 9382 
 9383   format %{ "sarq    $dst, $shift" %}
 9384   ins_encode %{
 9385     __ sarq($dst$$Register);
 9386   %}
 9387   ins_pipe(ialu_reg_reg);
 9388 %}
 9389 
 9390 // Arithmetic Shift Right by variable
 9391 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9392 %{
 9393   predicate(!VM_Version::supports_bmi2());
 9394   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9395   effect(KILL cr);
 9396 
 9397   format %{ "sarq    $dst, $shift" %}
 9398   ins_encode %{
 9399     __ sarq($dst$$Address);
 9400   %}
 9401   ins_pipe(ialu_mem_reg);
 9402 %}
 9403 
 9404 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9405 %{
 9406   predicate(VM_Version::supports_bmi2());
 9407   match(Set dst (RShiftL src shift));
 9408 
 9409   format %{ "sarxq   $dst, $src, $shift" %}
 9410   ins_encode %{
 9411     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 9412   %}
 9413   ins_pipe(ialu_reg_reg);
 9414 %}
 9415 
 9416 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9417 %{
 9418   predicate(VM_Version::supports_bmi2());
 9419   match(Set dst (RShiftL (LoadL src) shift));
 9420   ins_cost(175);
 9421   format %{ "sarxq   $dst, $src, $shift" %}
 9422   ins_encode %{
 9423     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 9424   %}
 9425   ins_pipe(ialu_reg_mem);
 9426 %}
 9427 
 9428 // Logical Shift Right by 8-bit immediate
 9429 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9430 %{
 9431   match(Set dst (URShiftL dst shift));
 9432   effect(KILL cr);
 9433 
 9434   format %{ "shrq    $dst, $shift" %}
 9435   ins_encode %{
 9436     __ shrq($dst$$Register, $shift$$constant);
 9437   %}
 9438   ins_pipe(ialu_reg);
 9439 %}
 9440 
 9441 // Logical Shift Right by 8-bit immediate
 9442 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9443 %{
 9444   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9445   effect(KILL cr);
 9446 
 9447   format %{ "shrq    $dst, $shift" %}
 9448   ins_encode %{
 9449     __ shrq($dst$$Address, $shift$$constant);
 9450   %}
 9451   ins_pipe(ialu_mem_imm);
 9452 %}
 9453 
 9454 // Logical Shift Right by variable
 9455 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9456 %{
 9457   predicate(!VM_Version::supports_bmi2());
 9458   match(Set dst (URShiftL dst shift));
 9459   effect(KILL cr);
 9460 
 9461   format %{ "shrq    $dst, $shift" %}
 9462   ins_encode %{
 9463     __ shrq($dst$$Register);
 9464   %}
 9465   ins_pipe(ialu_reg_reg);
 9466 %}
 9467 
 9468 // Logical Shift Right by variable
 9469 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9470 %{
 9471   predicate(!VM_Version::supports_bmi2());
 9472   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9473   effect(KILL cr);
 9474 
 9475   format %{ "shrq    $dst, $shift" %}
 9476   ins_encode %{
 9477     __ shrq($dst$$Address);
 9478   %}
 9479   ins_pipe(ialu_mem_reg);
 9480 %}
 9481 
 9482 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9483 %{
 9484   predicate(VM_Version::supports_bmi2());
 9485   match(Set dst (URShiftL src shift));
 9486 
 9487   format %{ "shrxq   $dst, $src, $shift" %}
 9488   ins_encode %{
 9489     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 9490   %}
 9491   ins_pipe(ialu_reg_reg);
 9492 %}
 9493 
 9494 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9495 %{
 9496   predicate(VM_Version::supports_bmi2());
 9497   match(Set dst (URShiftL (LoadL src) shift));
 9498   ins_cost(175);
 9499   format %{ "shrxq   $dst, $src, $shift" %}
 9500   ins_encode %{
 9501     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 9502   %}
 9503   ins_pipe(ialu_reg_mem);
 9504 %}
 9505 
 9506 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 9507 // This idiom is used by the compiler for the i2b bytecode.
 9508 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 9509 %{
 9510   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 9511 
 9512   format %{ "movsbl  $dst, $src\t# i2b" %}
 9513   ins_encode %{
 9514     __ movsbl($dst$$Register, $src$$Register);
 9515   %}
 9516   ins_pipe(ialu_reg_reg);
 9517 %}
 9518 
 9519 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 9520 // This idiom is used by the compiler the i2s bytecode.
 9521 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 9522 %{
 9523   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 9524 
 9525   format %{ "movswl  $dst, $src\t# i2s" %}
 9526   ins_encode %{
 9527     __ movswl($dst$$Register, $src$$Register);
 9528   %}
 9529   ins_pipe(ialu_reg_reg);
 9530 %}
 9531 
 9532 // ROL/ROR instructions
 9533 
 9534 // Rotate left by constant.
 9535 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9536 %{
 9537   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9538   match(Set dst (RotateLeft dst shift));
 9539   effect(KILL cr);
 9540   format %{ "roll    $dst, $shift" %}
 9541   ins_encode %{
 9542     __ roll($dst$$Register, $shift$$constant);
 9543   %}
 9544   ins_pipe(ialu_reg);
 9545 %}
 9546 
 9547 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 9548 %{
 9549   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9550   match(Set dst (RotateLeft src shift));
 9551   format %{ "rolxl   $dst, $src, $shift" %}
 9552   ins_encode %{
 9553     int shift = 32 - ($shift$$constant & 31);
 9554     __ rorxl($dst$$Register, $src$$Register, shift);
 9555   %}
 9556   ins_pipe(ialu_reg_reg);
 9557 %}
 9558 
 9559 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9560 %{
 9561   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9562   match(Set dst (RotateLeft (LoadI src) shift));
 9563   ins_cost(175);
 9564   format %{ "rolxl   $dst, $src, $shift" %}
 9565   ins_encode %{
 9566     int shift = 32 - ($shift$$constant & 31);
 9567     __ rorxl($dst$$Register, $src$$Address, shift);
 9568   %}
 9569   ins_pipe(ialu_reg_mem);
 9570 %}
 9571 
 9572 // Rotate Left by variable
 9573 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9574 %{
 9575   predicate(n->bottom_type()->basic_type() == T_INT);
 9576   match(Set dst (RotateLeft dst shift));
 9577   effect(KILL cr);
 9578   format %{ "roll    $dst, $shift" %}
 9579   ins_encode %{
 9580     __ roll($dst$$Register);
 9581   %}
 9582   ins_pipe(ialu_reg_reg);
 9583 %}
 9584 
 9585 // Rotate Right by constant.
 9586 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9587 %{
 9588   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9589   match(Set dst (RotateRight dst shift));
 9590   effect(KILL cr);
 9591   format %{ "rorl    $dst, $shift" %}
 9592   ins_encode %{
 9593     __ rorl($dst$$Register, $shift$$constant);
 9594   %}
 9595   ins_pipe(ialu_reg);
 9596 %}
 9597 
 9598 // Rotate Right by constant.
 9599 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 9600 %{
 9601   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9602   match(Set dst (RotateRight src shift));
 9603   format %{ "rorxl   $dst, $src, $shift" %}
 9604   ins_encode %{
 9605     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 9606   %}
 9607   ins_pipe(ialu_reg_reg);
 9608 %}
 9609 
 9610 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9611 %{
 9612   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9613   match(Set dst (RotateRight (LoadI src) shift));
 9614   ins_cost(175);
 9615   format %{ "rorxl   $dst, $src, $shift" %}
 9616   ins_encode %{
 9617     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 9618   %}
 9619   ins_pipe(ialu_reg_mem);
 9620 %}
 9621 
 9622 // Rotate Right by variable
 9623 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9624 %{
 9625   predicate(n->bottom_type()->basic_type() == T_INT);
 9626   match(Set dst (RotateRight dst shift));
 9627   effect(KILL cr);
 9628   format %{ "rorl    $dst, $shift" %}
 9629   ins_encode %{
 9630     __ rorl($dst$$Register);
 9631   %}
 9632   ins_pipe(ialu_reg_reg);
 9633 %}
 9634 
 9635 // Rotate Left by constant.
 9636 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9637 %{
 9638   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9639   match(Set dst (RotateLeft dst shift));
 9640   effect(KILL cr);
 9641   format %{ "rolq    $dst, $shift" %}
 9642   ins_encode %{
 9643     __ rolq($dst$$Register, $shift$$constant);
 9644   %}
 9645   ins_pipe(ialu_reg);
 9646 %}
 9647 
 9648 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 9649 %{
 9650   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9651   match(Set dst (RotateLeft src shift));
 9652   format %{ "rolxq   $dst, $src, $shift" %}
 9653   ins_encode %{
 9654     int shift = 64 - ($shift$$constant & 63);
 9655     __ rorxq($dst$$Register, $src$$Register, shift);
 9656   %}
 9657   ins_pipe(ialu_reg_reg);
 9658 %}
 9659 
 9660 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9661 %{
 9662   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9663   match(Set dst (RotateLeft (LoadL src) shift));
 9664   ins_cost(175);
 9665   format %{ "rolxq   $dst, $src, $shift" %}
 9666   ins_encode %{
 9667     int shift = 64 - ($shift$$constant & 63);
 9668     __ rorxq($dst$$Register, $src$$Address, shift);
 9669   %}
 9670   ins_pipe(ialu_reg_mem);
 9671 %}
 9672 
 9673 // Rotate Left by variable
 9674 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9675 %{
 9676   predicate(n->bottom_type()->basic_type() == T_LONG);
 9677   match(Set dst (RotateLeft dst shift));
 9678   effect(KILL cr);
 9679   format %{ "rolq    $dst, $shift" %}
 9680   ins_encode %{
 9681     __ rolq($dst$$Register);
 9682   %}
 9683   ins_pipe(ialu_reg_reg);
 9684 %}
 9685 
 9686 // Rotate Right by constant.
 9687 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9688 %{
 9689   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9690   match(Set dst (RotateRight dst shift));
 9691   effect(KILL cr);
 9692   format %{ "rorq    $dst, $shift" %}
 9693   ins_encode %{
 9694     __ rorq($dst$$Register, $shift$$constant);
 9695   %}
 9696   ins_pipe(ialu_reg);
 9697 %}
 9698 
 9699 // Rotate Right by constant
 9700 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 9701 %{
 9702   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9703   match(Set dst (RotateRight src shift));
 9704   format %{ "rorxq   $dst, $src, $shift" %}
 9705   ins_encode %{
 9706     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 9707   %}
 9708   ins_pipe(ialu_reg_reg);
 9709 %}
 9710 
 9711 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9712 %{
 9713   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9714   match(Set dst (RotateRight (LoadL src) shift));
 9715   ins_cost(175);
 9716   format %{ "rorxq   $dst, $src, $shift" %}
 9717   ins_encode %{
 9718     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 9719   %}
 9720   ins_pipe(ialu_reg_mem);
 9721 %}
 9722 
 9723 // Rotate Right by variable
 9724 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9725 %{
 9726   predicate(n->bottom_type()->basic_type() == T_LONG);
 9727   match(Set dst (RotateRight dst shift));
 9728   effect(KILL cr);
 9729   format %{ "rorq    $dst, $shift" %}
 9730   ins_encode %{
 9731     __ rorq($dst$$Register);
 9732   %}
 9733   ins_pipe(ialu_reg_reg);
 9734 %}
 9735 
 9736 //----------------------------- CompressBits/ExpandBits ------------------------
 9737 
 9738 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9739   predicate(n->bottom_type()->isa_long());
 9740   match(Set dst (CompressBits src mask));
 9741   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9742   ins_encode %{
 9743     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 9744   %}
 9745   ins_pipe( pipe_slow );
 9746 %}
 9747 
 9748 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9749   predicate(n->bottom_type()->isa_long());
 9750   match(Set dst (ExpandBits src mask));
 9751   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9752   ins_encode %{
 9753     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 9754   %}
 9755   ins_pipe( pipe_slow );
 9756 %}
 9757 
 9758 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9759   predicate(n->bottom_type()->isa_long());
 9760   match(Set dst (CompressBits src (LoadL mask)));
 9761   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9762   ins_encode %{
 9763     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 9764   %}
 9765   ins_pipe( pipe_slow );
 9766 %}
 9767 
 9768 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9769   predicate(n->bottom_type()->isa_long());
 9770   match(Set dst (ExpandBits src (LoadL mask)));
 9771   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9772   ins_encode %{
 9773     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 9774   %}
 9775   ins_pipe( pipe_slow );
 9776 %}
 9777 
 9778 
 9779 // Logical Instructions
 9780 
 9781 // Integer Logical Instructions
 9782 
 9783 // And Instructions
 9784 // And Register with Register
 9785 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9786 %{
 9787   match(Set dst (AndI dst src));
 9788   effect(KILL cr);
 9789 
 9790   format %{ "andl    $dst, $src\t# int" %}
 9791   ins_encode %{
 9792     __ andl($dst$$Register, $src$$Register);
 9793   %}
 9794   ins_pipe(ialu_reg_reg);
 9795 %}
 9796 
 9797 // And Register with Immediate 255
 9798 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9799 %{
 9800   match(Set dst (AndI src mask));
 9801 
 9802   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9803   ins_encode %{
 9804     __ movzbl($dst$$Register, $src$$Register);
 9805   %}
 9806   ins_pipe(ialu_reg);
 9807 %}
 9808 
 9809 // And Register with Immediate 255 and promote to long
 9810 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9811 %{
 9812   match(Set dst (ConvI2L (AndI src mask)));
 9813 
 9814   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9815   ins_encode %{
 9816     __ movzbl($dst$$Register, $src$$Register);
 9817   %}
 9818   ins_pipe(ialu_reg);
 9819 %}
 9820 
 9821 // And Register with Immediate 65535
 9822 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9823 %{
 9824   match(Set dst (AndI src mask));
 9825 
 9826   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9827   ins_encode %{
 9828     __ movzwl($dst$$Register, $src$$Register);
 9829   %}
 9830   ins_pipe(ialu_reg);
 9831 %}
 9832 
 9833 // And Register with Immediate 65535 and promote to long
 9834 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9835 %{
 9836   match(Set dst (ConvI2L (AndI src mask)));
 9837 
 9838   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9839   ins_encode %{
 9840     __ movzwl($dst$$Register, $src$$Register);
 9841   %}
 9842   ins_pipe(ialu_reg);
 9843 %}
 9844 
 9845 // Can skip int2long conversions after AND with small bitmask
 9846 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9847 %{
 9848   predicate(VM_Version::supports_bmi2());
 9849   ins_cost(125);
 9850   effect(TEMP tmp, KILL cr);
 9851   match(Set dst (ConvI2L (AndI src mask)));
 9852   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9853   ins_encode %{
 9854     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9855     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9856   %}
 9857   ins_pipe(ialu_reg_reg);
 9858 %}
 9859 
 9860 // And Register with Immediate
 9861 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9862 %{
 9863   match(Set dst (AndI dst src));
 9864   effect(KILL cr);
 9865 
 9866   format %{ "andl    $dst, $src\t# int" %}
 9867   ins_encode %{
 9868     __ andl($dst$$Register, $src$$constant);
 9869   %}
 9870   ins_pipe(ialu_reg);
 9871 %}
 9872 
 9873 // And Register with Memory
 9874 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9875 %{
 9876   match(Set dst (AndI dst (LoadI src)));
 9877   effect(KILL cr);
 9878 
 9879   ins_cost(150);
 9880   format %{ "andl    $dst, $src\t# int" %}
 9881   ins_encode %{
 9882     __ andl($dst$$Register, $src$$Address);
 9883   %}
 9884   ins_pipe(ialu_reg_mem);
 9885 %}
 9886 
 9887 // And Memory with Register
 9888 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9889 %{
 9890   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9891   effect(KILL cr);
 9892 
 9893   ins_cost(150);
 9894   format %{ "andb    $dst, $src\t# byte" %}
 9895   ins_encode %{
 9896     __ andb($dst$$Address, $src$$Register);
 9897   %}
 9898   ins_pipe(ialu_mem_reg);
 9899 %}
 9900 
 9901 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9902 %{
 9903   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9904   effect(KILL cr);
 9905 
 9906   ins_cost(150);
 9907   format %{ "andl    $dst, $src\t# int" %}
 9908   ins_encode %{
 9909     __ andl($dst$$Address, $src$$Register);
 9910   %}
 9911   ins_pipe(ialu_mem_reg);
 9912 %}
 9913 
 9914 // And Memory with Immediate
 9915 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9916 %{
 9917   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9918   effect(KILL cr);
 9919 
 9920   ins_cost(125);
 9921   format %{ "andl    $dst, $src\t# int" %}
 9922   ins_encode %{
 9923     __ andl($dst$$Address, $src$$constant);
 9924   %}
 9925   ins_pipe(ialu_mem_imm);
 9926 %}
 9927 
 9928 // BMI1 instructions
 9929 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
 9930   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
 9931   predicate(UseBMI1Instructions);
 9932   effect(KILL cr);
 9933 
 9934   ins_cost(125);
 9935   format %{ "andnl  $dst, $src1, $src2" %}
 9936 
 9937   ins_encode %{
 9938     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 9939   %}
 9940   ins_pipe(ialu_reg_mem);
 9941 %}
 9942 
 9943 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
 9944   match(Set dst (AndI (XorI src1 minus_1) src2));
 9945   predicate(UseBMI1Instructions);
 9946   effect(KILL cr);
 9947 
 9948   format %{ "andnl  $dst, $src1, $src2" %}
 9949 
 9950   ins_encode %{
 9951     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 9952   %}
 9953   ins_pipe(ialu_reg);
 9954 %}
 9955 
 9956 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
 9957   match(Set dst (AndI (SubI imm_zero src) src));
 9958   predicate(UseBMI1Instructions);
 9959   effect(KILL cr);
 9960 
 9961   format %{ "blsil  $dst, $src" %}
 9962 
 9963   ins_encode %{
 9964     __ blsil($dst$$Register, $src$$Register);
 9965   %}
 9966   ins_pipe(ialu_reg);
 9967 %}
 9968 
 9969 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
 9970   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 9971   predicate(UseBMI1Instructions);
 9972   effect(KILL cr);
 9973 
 9974   ins_cost(125);
 9975   format %{ "blsil  $dst, $src" %}
 9976 
 9977   ins_encode %{
 9978     __ blsil($dst$$Register, $src$$Address);
 9979   %}
 9980   ins_pipe(ialu_reg_mem);
 9981 %}
 9982 
 9983 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
 9984 %{
 9985   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
 9986   predicate(UseBMI1Instructions);
 9987   effect(KILL cr);
 9988 
 9989   ins_cost(125);
 9990   format %{ "blsmskl $dst, $src" %}
 9991 
 9992   ins_encode %{
 9993     __ blsmskl($dst$$Register, $src$$Address);
 9994   %}
 9995   ins_pipe(ialu_reg_mem);
 9996 %}
 9997 
 9998 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
 9999 %{
10000   match(Set dst (XorI (AddI src minus_1) src));
10001   predicate(UseBMI1Instructions);
10002   effect(KILL cr);
10003 
10004   format %{ "blsmskl $dst, $src" %}
10005 
10006   ins_encode %{
10007     __ blsmskl($dst$$Register, $src$$Register);
10008   %}
10009 
10010   ins_pipe(ialu_reg);
10011 %}
10012 
10013 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10014 %{
10015   match(Set dst (AndI (AddI src minus_1) src) );
10016   predicate(UseBMI1Instructions);
10017   effect(KILL cr);
10018 
10019   format %{ "blsrl  $dst, $src" %}
10020 
10021   ins_encode %{
10022     __ blsrl($dst$$Register, $src$$Register);
10023   %}
10024 
10025   ins_pipe(ialu_reg_mem);
10026 %}
10027 
10028 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10029 %{
10030   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
10031   predicate(UseBMI1Instructions);
10032   effect(KILL cr);
10033 
10034   ins_cost(125);
10035   format %{ "blsrl  $dst, $src" %}
10036 
10037   ins_encode %{
10038     __ blsrl($dst$$Register, $src$$Address);
10039   %}
10040 
10041   ins_pipe(ialu_reg);
10042 %}
10043 
10044 // Or Instructions
10045 // Or Register with Register
10046 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10047 %{
10048   match(Set dst (OrI dst src));
10049   effect(KILL cr);
10050 
10051   format %{ "orl     $dst, $src\t# int" %}
10052   ins_encode %{
10053     __ orl($dst$$Register, $src$$Register);
10054   %}
10055   ins_pipe(ialu_reg_reg);
10056 %}
10057 
10058 // Or Register with Immediate
10059 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10060 %{
10061   match(Set dst (OrI dst src));
10062   effect(KILL cr);
10063 
10064   format %{ "orl     $dst, $src\t# int" %}
10065   ins_encode %{
10066     __ orl($dst$$Register, $src$$constant);
10067   %}
10068   ins_pipe(ialu_reg);
10069 %}
10070 
10071 // Or Register with Memory
10072 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10073 %{
10074   match(Set dst (OrI dst (LoadI src)));
10075   effect(KILL cr);
10076 
10077   ins_cost(150);
10078   format %{ "orl     $dst, $src\t# int" %}
10079   ins_encode %{
10080     __ orl($dst$$Register, $src$$Address);
10081   %}
10082   ins_pipe(ialu_reg_mem);
10083 %}
10084 
10085 // Or Memory with Register
10086 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10087 %{
10088   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
10089   effect(KILL cr);
10090 
10091   ins_cost(150);
10092   format %{ "orb    $dst, $src\t# byte" %}
10093   ins_encode %{
10094     __ orb($dst$$Address, $src$$Register);
10095   %}
10096   ins_pipe(ialu_mem_reg);
10097 %}
10098 
10099 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10100 %{
10101   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10102   effect(KILL cr);
10103 
10104   ins_cost(150);
10105   format %{ "orl     $dst, $src\t# int" %}
10106   ins_encode %{
10107     __ orl($dst$$Address, $src$$Register);
10108   %}
10109   ins_pipe(ialu_mem_reg);
10110 %}
10111 
10112 // Or Memory with Immediate
10113 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10114 %{
10115   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10116   effect(KILL cr);
10117 
10118   ins_cost(125);
10119   format %{ "orl     $dst, $src\t# int" %}
10120   ins_encode %{
10121     __ orl($dst$$Address, $src$$constant);
10122   %}
10123   ins_pipe(ialu_mem_imm);
10124 %}
10125 
10126 // Xor Instructions
10127 // Xor Register with Register
10128 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10129 %{
10130   match(Set dst (XorI dst src));
10131   effect(KILL cr);
10132 
10133   format %{ "xorl    $dst, $src\t# int" %}
10134   ins_encode %{
10135     __ xorl($dst$$Register, $src$$Register);
10136   %}
10137   ins_pipe(ialu_reg_reg);
10138 %}
10139 
10140 // Xor Register with Immediate -1
10141 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10142   match(Set dst (XorI dst imm));
10143 
10144   format %{ "not    $dst" %}
10145   ins_encode %{
10146      __ notl($dst$$Register);
10147   %}
10148   ins_pipe(ialu_reg);
10149 %}
10150 
10151 // Xor Register with Immediate
10152 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10153 %{
10154   match(Set dst (XorI dst src));
10155   effect(KILL cr);
10156 
10157   format %{ "xorl    $dst, $src\t# int" %}
10158   ins_encode %{
10159     __ xorl($dst$$Register, $src$$constant);
10160   %}
10161   ins_pipe(ialu_reg);
10162 %}
10163 
10164 // Xor Register with Memory
10165 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10166 %{
10167   match(Set dst (XorI dst (LoadI src)));
10168   effect(KILL cr);
10169 
10170   ins_cost(150);
10171   format %{ "xorl    $dst, $src\t# int" %}
10172   ins_encode %{
10173     __ xorl($dst$$Register, $src$$Address);
10174   %}
10175   ins_pipe(ialu_reg_mem);
10176 %}
10177 
10178 // Xor Memory with Register
10179 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10180 %{
10181   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
10182   effect(KILL cr);
10183 
10184   ins_cost(150);
10185   format %{ "xorb    $dst, $src\t# byte" %}
10186   ins_encode %{
10187     __ xorb($dst$$Address, $src$$Register);
10188   %}
10189   ins_pipe(ialu_mem_reg);
10190 %}
10191 
10192 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10193 %{
10194   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10195   effect(KILL cr);
10196 
10197   ins_cost(150);
10198   format %{ "xorl    $dst, $src\t# int" %}
10199   ins_encode %{
10200     __ xorl($dst$$Address, $src$$Register);
10201   %}
10202   ins_pipe(ialu_mem_reg);
10203 %}
10204 
10205 // Xor Memory with Immediate
10206 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10207 %{
10208   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10209   effect(KILL cr);
10210 
10211   ins_cost(125);
10212   format %{ "xorl    $dst, $src\t# int" %}
10213   ins_encode %{
10214     __ xorl($dst$$Address, $src$$constant);
10215   %}
10216   ins_pipe(ialu_mem_imm);
10217 %}
10218 
10219 
10220 // Long Logical Instructions
10221 
10222 // And Instructions
10223 // And Register with Register
10224 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10225 %{
10226   match(Set dst (AndL dst src));
10227   effect(KILL cr);
10228 
10229   format %{ "andq    $dst, $src\t# long" %}
10230   ins_encode %{
10231     __ andq($dst$$Register, $src$$Register);
10232   %}
10233   ins_pipe(ialu_reg_reg);
10234 %}
10235 
10236 // And Register with Immediate 255
10237 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
10238 %{
10239   match(Set dst (AndL src mask));
10240 
10241   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
10242   ins_encode %{
10243     // movzbl zeroes out the upper 32-bit and does not need REX.W
10244     __ movzbl($dst$$Register, $src$$Register);
10245   %}
10246   ins_pipe(ialu_reg);
10247 %}
10248 
10249 // And Register with Immediate 65535
10250 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
10251 %{
10252   match(Set dst (AndL src mask));
10253 
10254   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
10255   ins_encode %{
10256     // movzwl zeroes out the upper 32-bit and does not need REX.W
10257     __ movzwl($dst$$Register, $src$$Register);
10258   %}
10259   ins_pipe(ialu_reg);
10260 %}
10261 
10262 // And Register with Immediate
10263 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10264 %{
10265   match(Set dst (AndL dst src));
10266   effect(KILL cr);
10267 
10268   format %{ "andq    $dst, $src\t# long" %}
10269   ins_encode %{
10270     __ andq($dst$$Register, $src$$constant);
10271   %}
10272   ins_pipe(ialu_reg);
10273 %}
10274 
10275 // And Register with Memory
10276 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10277 %{
10278   match(Set dst (AndL dst (LoadL src)));
10279   effect(KILL cr);
10280 
10281   ins_cost(150);
10282   format %{ "andq    $dst, $src\t# long" %}
10283   ins_encode %{
10284     __ andq($dst$$Register, $src$$Address);
10285   %}
10286   ins_pipe(ialu_reg_mem);
10287 %}
10288 
10289 // And Memory with Register
10290 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10291 %{
10292   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10293   effect(KILL cr);
10294 
10295   ins_cost(150);
10296   format %{ "andq    $dst, $src\t# long" %}
10297   ins_encode %{
10298     __ andq($dst$$Address, $src$$Register);
10299   %}
10300   ins_pipe(ialu_mem_reg);
10301 %}
10302 
10303 // And Memory with Immediate
10304 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10305 %{
10306   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10307   effect(KILL cr);
10308 
10309   ins_cost(125);
10310   format %{ "andq    $dst, $src\t# long" %}
10311   ins_encode %{
10312     __ andq($dst$$Address, $src$$constant);
10313   %}
10314   ins_pipe(ialu_mem_imm);
10315 %}
10316 
10317 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
10318 %{
10319   // con should be a pure 64-bit immediate given that not(con) is a power of 2
10320   // because AND/OR works well enough for 8/32-bit values.
10321   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
10322 
10323   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
10324   effect(KILL cr);
10325 
10326   ins_cost(125);
10327   format %{ "btrq    $dst, log2(not($con))\t# long" %}
10328   ins_encode %{
10329     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
10330   %}
10331   ins_pipe(ialu_mem_imm);
10332 %}
10333 
10334 // BMI1 instructions
10335 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10336   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10337   predicate(UseBMI1Instructions);
10338   effect(KILL cr);
10339 
10340   ins_cost(125);
10341   format %{ "andnq  $dst, $src1, $src2" %}
10342 
10343   ins_encode %{
10344     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10345   %}
10346   ins_pipe(ialu_reg_mem);
10347 %}
10348 
10349 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10350   match(Set dst (AndL (XorL src1 minus_1) src2));
10351   predicate(UseBMI1Instructions);
10352   effect(KILL cr);
10353 
10354   format %{ "andnq  $dst, $src1, $src2" %}
10355 
10356   ins_encode %{
10357   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10358   %}
10359   ins_pipe(ialu_reg_mem);
10360 %}
10361 
10362 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10363   match(Set dst (AndL (SubL imm_zero src) src));
10364   predicate(UseBMI1Instructions);
10365   effect(KILL cr);
10366 
10367   format %{ "blsiq  $dst, $src" %}
10368 
10369   ins_encode %{
10370     __ blsiq($dst$$Register, $src$$Register);
10371   %}
10372   ins_pipe(ialu_reg);
10373 %}
10374 
10375 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10376   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10377   predicate(UseBMI1Instructions);
10378   effect(KILL cr);
10379 
10380   ins_cost(125);
10381   format %{ "blsiq  $dst, $src" %}
10382 
10383   ins_encode %{
10384     __ blsiq($dst$$Register, $src$$Address);
10385   %}
10386   ins_pipe(ialu_reg_mem);
10387 %}
10388 
10389 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10390 %{
10391   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10392   predicate(UseBMI1Instructions);
10393   effect(KILL cr);
10394 
10395   ins_cost(125);
10396   format %{ "blsmskq $dst, $src" %}
10397 
10398   ins_encode %{
10399     __ blsmskq($dst$$Register, $src$$Address);
10400   %}
10401   ins_pipe(ialu_reg_mem);
10402 %}
10403 
10404 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10405 %{
10406   match(Set dst (XorL (AddL src minus_1) src));
10407   predicate(UseBMI1Instructions);
10408   effect(KILL cr);
10409 
10410   format %{ "blsmskq $dst, $src" %}
10411 
10412   ins_encode %{
10413     __ blsmskq($dst$$Register, $src$$Register);
10414   %}
10415 
10416   ins_pipe(ialu_reg);
10417 %}
10418 
10419 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10420 %{
10421   match(Set dst (AndL (AddL src minus_1) src) );
10422   predicate(UseBMI1Instructions);
10423   effect(KILL cr);
10424 
10425   format %{ "blsrq  $dst, $src" %}
10426 
10427   ins_encode %{
10428     __ blsrq($dst$$Register, $src$$Register);
10429   %}
10430 
10431   ins_pipe(ialu_reg);
10432 %}
10433 
10434 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10435 %{
10436   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10437   predicate(UseBMI1Instructions);
10438   effect(KILL cr);
10439 
10440   ins_cost(125);
10441   format %{ "blsrq  $dst, $src" %}
10442 
10443   ins_encode %{
10444     __ blsrq($dst$$Register, $src$$Address);
10445   %}
10446 
10447   ins_pipe(ialu_reg);
10448 %}
10449 
10450 // Or Instructions
10451 // Or Register with Register
10452 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10453 %{
10454   match(Set dst (OrL dst src));
10455   effect(KILL cr);
10456 
10457   format %{ "orq     $dst, $src\t# long" %}
10458   ins_encode %{
10459     __ orq($dst$$Register, $src$$Register);
10460   %}
10461   ins_pipe(ialu_reg_reg);
10462 %}
10463 
10464 // Use any_RegP to match R15 (TLS register) without spilling.
10465 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10466   match(Set dst (OrL dst (CastP2X src)));
10467   effect(KILL cr);
10468 
10469   format %{ "orq     $dst, $src\t# long" %}
10470   ins_encode %{
10471     __ orq($dst$$Register, $src$$Register);
10472   %}
10473   ins_pipe(ialu_reg_reg);
10474 %}
10475 
10476 
10477 // Or Register with Immediate
10478 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10479 %{
10480   match(Set dst (OrL dst src));
10481   effect(KILL cr);
10482 
10483   format %{ "orq     $dst, $src\t# long" %}
10484   ins_encode %{
10485     __ orq($dst$$Register, $src$$constant);
10486   %}
10487   ins_pipe(ialu_reg);
10488 %}
10489 
10490 // Or Register with Memory
10491 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10492 %{
10493   match(Set dst (OrL dst (LoadL src)));
10494   effect(KILL cr);
10495 
10496   ins_cost(150);
10497   format %{ "orq     $dst, $src\t# long" %}
10498   ins_encode %{
10499     __ orq($dst$$Register, $src$$Address);
10500   %}
10501   ins_pipe(ialu_reg_mem);
10502 %}
10503 
10504 // Or Memory with Register
10505 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10506 %{
10507   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10508   effect(KILL cr);
10509 
10510   ins_cost(150);
10511   format %{ "orq     $dst, $src\t# long" %}
10512   ins_encode %{
10513     __ orq($dst$$Address, $src$$Register);
10514   %}
10515   ins_pipe(ialu_mem_reg);
10516 %}
10517 
10518 // Or Memory with Immediate
10519 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10520 %{
10521   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10522   effect(KILL cr);
10523 
10524   ins_cost(125);
10525   format %{ "orq     $dst, $src\t# long" %}
10526   ins_encode %{
10527     __ orq($dst$$Address, $src$$constant);
10528   %}
10529   ins_pipe(ialu_mem_imm);
10530 %}
10531 
10532 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
10533 %{
10534   // con should be a pure 64-bit power of 2 immediate
10535   // because AND/OR works well enough for 8/32-bit values.
10536   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
10537 
10538   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
10539   effect(KILL cr);
10540 
10541   ins_cost(125);
10542   format %{ "btsq    $dst, log2($con)\t# long" %}
10543   ins_encode %{
10544     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
10545   %}
10546   ins_pipe(ialu_mem_imm);
10547 %}
10548 
10549 // Xor Instructions
10550 // Xor Register with Register
10551 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10552 %{
10553   match(Set dst (XorL dst src));
10554   effect(KILL cr);
10555 
10556   format %{ "xorq    $dst, $src\t# long" %}
10557   ins_encode %{
10558     __ xorq($dst$$Register, $src$$Register);
10559   %}
10560   ins_pipe(ialu_reg_reg);
10561 %}
10562 
10563 // Xor Register with Immediate -1
10564 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10565   match(Set dst (XorL dst imm));
10566 
10567   format %{ "notq   $dst" %}
10568   ins_encode %{
10569      __ notq($dst$$Register);
10570   %}
10571   ins_pipe(ialu_reg);
10572 %}
10573 
10574 // Xor Register with Immediate
10575 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10576 %{
10577   match(Set dst (XorL dst src));
10578   effect(KILL cr);
10579 
10580   format %{ "xorq    $dst, $src\t# long" %}
10581   ins_encode %{
10582     __ xorq($dst$$Register, $src$$constant);
10583   %}
10584   ins_pipe(ialu_reg);
10585 %}
10586 
10587 // Xor Register with Memory
10588 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10589 %{
10590   match(Set dst (XorL dst (LoadL src)));
10591   effect(KILL cr);
10592 
10593   ins_cost(150);
10594   format %{ "xorq    $dst, $src\t# long" %}
10595   ins_encode %{
10596     __ xorq($dst$$Register, $src$$Address);
10597   %}
10598   ins_pipe(ialu_reg_mem);
10599 %}
10600 
10601 // Xor Memory with Register
10602 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10603 %{
10604   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10605   effect(KILL cr);
10606 
10607   ins_cost(150);
10608   format %{ "xorq    $dst, $src\t# long" %}
10609   ins_encode %{
10610     __ xorq($dst$$Address, $src$$Register);
10611   %}
10612   ins_pipe(ialu_mem_reg);
10613 %}
10614 
10615 // Xor Memory with Immediate
10616 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10617 %{
10618   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10619   effect(KILL cr);
10620 
10621   ins_cost(125);
10622   format %{ "xorq    $dst, $src\t# long" %}
10623   ins_encode %{
10624     __ xorq($dst$$Address, $src$$constant);
10625   %}
10626   ins_pipe(ialu_mem_imm);
10627 %}
10628 
10629 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10630 %{
10631   match(Set dst (CmpLTMask p q));
10632   effect(KILL cr);
10633 
10634   ins_cost(400);
10635   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10636             "setlt   $dst\n\t"
10637             "movzbl  $dst, $dst\n\t"
10638             "negl    $dst" %}
10639   ins_encode %{
10640     __ cmpl($p$$Register, $q$$Register);
10641     __ setb(Assembler::less, $dst$$Register);
10642     __ movzbl($dst$$Register, $dst$$Register);
10643     __ negl($dst$$Register);
10644   %}
10645   ins_pipe(pipe_slow);
10646 %}
10647 
10648 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
10649 %{
10650   match(Set dst (CmpLTMask dst zero));
10651   effect(KILL cr);
10652 
10653   ins_cost(100);
10654   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10655   ins_encode %{
10656     __ sarl($dst$$Register, 31);
10657   %}
10658   ins_pipe(ialu_reg);
10659 %}
10660 
10661 /* Better to save a register than avoid a branch */
10662 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10663 %{
10664   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10665   effect(KILL cr);
10666   ins_cost(300);
10667   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10668             "jge     done\n\t"
10669             "addl    $p,$y\n"
10670             "done:   " %}
10671   ins_encode %{
10672     Register Rp = $p$$Register;
10673     Register Rq = $q$$Register;
10674     Register Ry = $y$$Register;
10675     Label done;
10676     __ subl(Rp, Rq);
10677     __ jccb(Assembler::greaterEqual, done);
10678     __ addl(Rp, Ry);
10679     __ bind(done);
10680   %}
10681   ins_pipe(pipe_cmplt);
10682 %}
10683 
10684 /* Better to save a register than avoid a branch */
10685 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10686 %{
10687   match(Set y (AndI (CmpLTMask p q) y));
10688   effect(KILL cr);
10689 
10690   ins_cost(300);
10691 
10692   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10693             "jlt     done\n\t"
10694             "xorl    $y, $y\n"
10695             "done:   " %}
10696   ins_encode %{
10697     Register Rp = $p$$Register;
10698     Register Rq = $q$$Register;
10699     Register Ry = $y$$Register;
10700     Label done;
10701     __ cmpl(Rp, Rq);
10702     __ jccb(Assembler::less, done);
10703     __ xorl(Ry, Ry);
10704     __ bind(done);
10705   %}
10706   ins_pipe(pipe_cmplt);
10707 %}
10708 
10709 
10710 //---------- FP Instructions------------------------------------------------
10711 
10712 // Really expensive, avoid
10713 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10714 %{
10715   match(Set cr (CmpF src1 src2));
10716 
10717   ins_cost(500);
10718   format %{ "ucomiss $src1, $src2\n\t"
10719             "jnp,s   exit\n\t"
10720             "pushfq\t# saw NaN, set CF\n\t"
10721             "andq    [rsp], #0xffffff2b\n\t"
10722             "popfq\n"
10723     "exit:" %}
10724   ins_encode %{
10725     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10726     emit_cmpfp_fixup(_masm);
10727   %}
10728   ins_pipe(pipe_slow);
10729 %}
10730 
10731 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10732   match(Set cr (CmpF src1 src2));
10733 
10734   ins_cost(100);
10735   format %{ "ucomiss $src1, $src2" %}
10736   ins_encode %{
10737     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10738   %}
10739   ins_pipe(pipe_slow);
10740 %}
10741 
10742 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10743   match(Set cr (CmpF src1 (LoadF src2)));
10744 
10745   ins_cost(100);
10746   format %{ "ucomiss $src1, $src2" %}
10747   ins_encode %{
10748     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10749   %}
10750   ins_pipe(pipe_slow);
10751 %}
10752 
10753 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10754   match(Set cr (CmpF src con));
10755   ins_cost(100);
10756   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10757   ins_encode %{
10758     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10759   %}
10760   ins_pipe(pipe_slow);
10761 %}
10762 
10763 // Really expensive, avoid
10764 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10765 %{
10766   match(Set cr (CmpD src1 src2));
10767 
10768   ins_cost(500);
10769   format %{ "ucomisd $src1, $src2\n\t"
10770             "jnp,s   exit\n\t"
10771             "pushfq\t# saw NaN, set CF\n\t"
10772             "andq    [rsp], #0xffffff2b\n\t"
10773             "popfq\n"
10774     "exit:" %}
10775   ins_encode %{
10776     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10777     emit_cmpfp_fixup(_masm);
10778   %}
10779   ins_pipe(pipe_slow);
10780 %}
10781 
10782 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10783   match(Set cr (CmpD src1 src2));
10784 
10785   ins_cost(100);
10786   format %{ "ucomisd $src1, $src2 test" %}
10787   ins_encode %{
10788     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10789   %}
10790   ins_pipe(pipe_slow);
10791 %}
10792 
10793 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10794   match(Set cr (CmpD src1 (LoadD src2)));
10795 
10796   ins_cost(100);
10797   format %{ "ucomisd $src1, $src2" %}
10798   ins_encode %{
10799     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10800   %}
10801   ins_pipe(pipe_slow);
10802 %}
10803 
10804 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10805   match(Set cr (CmpD src con));
10806   ins_cost(100);
10807   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10808   ins_encode %{
10809     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10810   %}
10811   ins_pipe(pipe_slow);
10812 %}
10813 
10814 // Compare into -1,0,1
10815 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10816 %{
10817   match(Set dst (CmpF3 src1 src2));
10818   effect(KILL cr);
10819 
10820   ins_cost(275);
10821   format %{ "ucomiss $src1, $src2\n\t"
10822             "movl    $dst, #-1\n\t"
10823             "jp,s    done\n\t"
10824             "jb,s    done\n\t"
10825             "setne   $dst\n\t"
10826             "movzbl  $dst, $dst\n"
10827     "done:" %}
10828   ins_encode %{
10829     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10830     emit_cmpfp3(_masm, $dst$$Register);
10831   %}
10832   ins_pipe(pipe_slow);
10833 %}
10834 
10835 // Compare into -1,0,1
10836 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10837 %{
10838   match(Set dst (CmpF3 src1 (LoadF src2)));
10839   effect(KILL cr);
10840 
10841   ins_cost(275);
10842   format %{ "ucomiss $src1, $src2\n\t"
10843             "movl    $dst, #-1\n\t"
10844             "jp,s    done\n\t"
10845             "jb,s    done\n\t"
10846             "setne   $dst\n\t"
10847             "movzbl  $dst, $dst\n"
10848     "done:" %}
10849   ins_encode %{
10850     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10851     emit_cmpfp3(_masm, $dst$$Register);
10852   %}
10853   ins_pipe(pipe_slow);
10854 %}
10855 
10856 // Compare into -1,0,1
10857 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10858   match(Set dst (CmpF3 src con));
10859   effect(KILL cr);
10860 
10861   ins_cost(275);
10862   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10863             "movl    $dst, #-1\n\t"
10864             "jp,s    done\n\t"
10865             "jb,s    done\n\t"
10866             "setne   $dst\n\t"
10867             "movzbl  $dst, $dst\n"
10868     "done:" %}
10869   ins_encode %{
10870     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10871     emit_cmpfp3(_masm, $dst$$Register);
10872   %}
10873   ins_pipe(pipe_slow);
10874 %}
10875 
10876 // Compare into -1,0,1
10877 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10878 %{
10879   match(Set dst (CmpD3 src1 src2));
10880   effect(KILL cr);
10881 
10882   ins_cost(275);
10883   format %{ "ucomisd $src1, $src2\n\t"
10884             "movl    $dst, #-1\n\t"
10885             "jp,s    done\n\t"
10886             "jb,s    done\n\t"
10887             "setne   $dst\n\t"
10888             "movzbl  $dst, $dst\n"
10889     "done:" %}
10890   ins_encode %{
10891     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10892     emit_cmpfp3(_masm, $dst$$Register);
10893   %}
10894   ins_pipe(pipe_slow);
10895 %}
10896 
10897 // Compare into -1,0,1
10898 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10899 %{
10900   match(Set dst (CmpD3 src1 (LoadD src2)));
10901   effect(KILL cr);
10902 
10903   ins_cost(275);
10904   format %{ "ucomisd $src1, $src2\n\t"
10905             "movl    $dst, #-1\n\t"
10906             "jp,s    done\n\t"
10907             "jb,s    done\n\t"
10908             "setne   $dst\n\t"
10909             "movzbl  $dst, $dst\n"
10910     "done:" %}
10911   ins_encode %{
10912     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10913     emit_cmpfp3(_masm, $dst$$Register);
10914   %}
10915   ins_pipe(pipe_slow);
10916 %}
10917 
10918 // Compare into -1,0,1
10919 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10920   match(Set dst (CmpD3 src con));
10921   effect(KILL cr);
10922 
10923   ins_cost(275);
10924   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10925             "movl    $dst, #-1\n\t"
10926             "jp,s    done\n\t"
10927             "jb,s    done\n\t"
10928             "setne   $dst\n\t"
10929             "movzbl  $dst, $dst\n"
10930     "done:" %}
10931   ins_encode %{
10932     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10933     emit_cmpfp3(_masm, $dst$$Register);
10934   %}
10935   ins_pipe(pipe_slow);
10936 %}
10937 
10938 //----------Arithmetic Conversion Instructions---------------------------------
10939 
10940 instruct convF2D_reg_reg(regD dst, regF src)
10941 %{
10942   match(Set dst (ConvF2D src));
10943 
10944   format %{ "cvtss2sd $dst, $src" %}
10945   ins_encode %{
10946     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10947   %}
10948   ins_pipe(pipe_slow); // XXX
10949 %}
10950 
10951 instruct convF2D_reg_mem(regD dst, memory src)
10952 %{
10953   match(Set dst (ConvF2D (LoadF src)));
10954 
10955   format %{ "cvtss2sd $dst, $src" %}
10956   ins_encode %{
10957     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10958   %}
10959   ins_pipe(pipe_slow); // XXX
10960 %}
10961 
10962 instruct convD2F_reg_reg(regF dst, regD src)
10963 %{
10964   match(Set dst (ConvD2F src));
10965 
10966   format %{ "cvtsd2ss $dst, $src" %}
10967   ins_encode %{
10968     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10969   %}
10970   ins_pipe(pipe_slow); // XXX
10971 %}
10972 
10973 instruct convD2F_reg_mem(regF dst, memory src)
10974 %{
10975   match(Set dst (ConvD2F (LoadD src)));
10976 
10977   format %{ "cvtsd2ss $dst, $src" %}
10978   ins_encode %{
10979     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10980   %}
10981   ins_pipe(pipe_slow); // XXX
10982 %}
10983 
10984 // XXX do mem variants
10985 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10986 %{
10987   match(Set dst (ConvF2I src));
10988   effect(KILL cr);
10989   format %{ "convert_f2i $dst, $src" %}
10990   ins_encode %{
10991     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
10992   %}
10993   ins_pipe(pipe_slow);
10994 %}
10995 
10996 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10997 %{
10998   match(Set dst (ConvF2L src));
10999   effect(KILL cr);
11000   format %{ "convert_f2l $dst, $src"%}
11001   ins_encode %{
11002     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11003   %}
11004   ins_pipe(pipe_slow);
11005 %}
11006 
11007 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11008 %{
11009   match(Set dst (ConvD2I src));
11010   effect(KILL cr);
11011   format %{ "convert_d2i $dst, $src"%}
11012   ins_encode %{
11013     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11014   %}
11015   ins_pipe(pipe_slow);
11016 %}
11017 
11018 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11019 %{
11020   match(Set dst (ConvD2L src));
11021   effect(KILL cr);
11022   format %{ "convert_d2l $dst, $src"%}
11023   ins_encode %{
11024     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11025   %}
11026   ins_pipe(pipe_slow);
11027 %}
11028 
11029 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11030 %{
11031   match(Set dst (RoundD src));
11032   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11033   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
11034   ins_encode %{
11035     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11036   %}
11037   ins_pipe(pipe_slow);
11038 %}
11039 
11040 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11041 %{
11042   match(Set dst (RoundF src));
11043   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11044   format %{ "round_float $dst,$src" %}
11045   ins_encode %{
11046     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11047   %}
11048   ins_pipe(pipe_slow);
11049 %}
11050 
11051 instruct convI2F_reg_reg(regF dst, rRegI src)
11052 %{
11053   predicate(!UseXmmI2F);
11054   match(Set dst (ConvI2F src));
11055 
11056   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11057   ins_encode %{
11058     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11059   %}
11060   ins_pipe(pipe_slow); // XXX
11061 %}
11062 
11063 instruct convI2F_reg_mem(regF dst, memory src)
11064 %{
11065   match(Set dst (ConvI2F (LoadI src)));
11066 
11067   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11068   ins_encode %{
11069     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
11070   %}
11071   ins_pipe(pipe_slow); // XXX
11072 %}
11073 
11074 instruct convI2D_reg_reg(regD dst, rRegI src)
11075 %{
11076   predicate(!UseXmmI2D);
11077   match(Set dst (ConvI2D src));
11078 
11079   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11080   ins_encode %{
11081     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11082   %}
11083   ins_pipe(pipe_slow); // XXX
11084 %}
11085 
11086 instruct convI2D_reg_mem(regD dst, memory src)
11087 %{
11088   match(Set dst (ConvI2D (LoadI src)));
11089 
11090   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11091   ins_encode %{
11092     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
11093   %}
11094   ins_pipe(pipe_slow); // XXX
11095 %}
11096 
11097 instruct convXI2F_reg(regF dst, rRegI src)
11098 %{
11099   predicate(UseXmmI2F);
11100   match(Set dst (ConvI2F src));
11101 
11102   format %{ "movdl $dst, $src\n\t"
11103             "cvtdq2psl $dst, $dst\t# i2f" %}
11104   ins_encode %{
11105     __ movdl($dst$$XMMRegister, $src$$Register);
11106     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11107   %}
11108   ins_pipe(pipe_slow); // XXX
11109 %}
11110 
11111 instruct convXI2D_reg(regD dst, rRegI src)
11112 %{
11113   predicate(UseXmmI2D);
11114   match(Set dst (ConvI2D src));
11115 
11116   format %{ "movdl $dst, $src\n\t"
11117             "cvtdq2pdl $dst, $dst\t# i2d" %}
11118   ins_encode %{
11119     __ movdl($dst$$XMMRegister, $src$$Register);
11120     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11121   %}
11122   ins_pipe(pipe_slow); // XXX
11123 %}
11124 
11125 instruct convL2F_reg_reg(regF dst, rRegL src)
11126 %{
11127   match(Set dst (ConvL2F src));
11128 
11129   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11130   ins_encode %{
11131     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11132   %}
11133   ins_pipe(pipe_slow); // XXX
11134 %}
11135 
11136 instruct convL2F_reg_mem(regF dst, memory src)
11137 %{
11138   match(Set dst (ConvL2F (LoadL src)));
11139 
11140   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11141   ins_encode %{
11142     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11143   %}
11144   ins_pipe(pipe_slow); // XXX
11145 %}
11146 
11147 instruct convL2D_reg_reg(regD dst, rRegL src)
11148 %{
11149   match(Set dst (ConvL2D src));
11150 
11151   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11152   ins_encode %{
11153     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11154   %}
11155   ins_pipe(pipe_slow); // XXX
11156 %}
11157 
11158 instruct convL2D_reg_mem(regD dst, memory src)
11159 %{
11160   match(Set dst (ConvL2D (LoadL src)));
11161 
11162   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11163   ins_encode %{
11164     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11165   %}
11166   ins_pipe(pipe_slow); // XXX
11167 %}
11168 
11169 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11170 %{
11171   match(Set dst (ConvI2L src));
11172 
11173   ins_cost(125);
11174   format %{ "movslq  $dst, $src\t# i2l" %}
11175   ins_encode %{
11176     __ movslq($dst$$Register, $src$$Register);
11177   %}
11178   ins_pipe(ialu_reg_reg);
11179 %}
11180 
11181 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11182 // %{
11183 //   match(Set dst (ConvI2L src));
11184 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11185 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11186 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11187 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11188 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11189 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11190 
11191 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11192 //   ins_encode(enc_copy(dst, src));
11193 // //   opcode(0x63); // needs REX.W
11194 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11195 //   ins_pipe(ialu_reg_reg);
11196 // %}
11197 
11198 // Zero-extend convert int to long
11199 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11200 %{
11201   match(Set dst (AndL (ConvI2L src) mask));
11202 
11203   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11204   ins_encode %{
11205     if ($dst$$reg != $src$$reg) {
11206       __ movl($dst$$Register, $src$$Register);
11207     }
11208   %}
11209   ins_pipe(ialu_reg_reg);
11210 %}
11211 
11212 // Zero-extend convert int to long
11213 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11214 %{
11215   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11216 
11217   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11218   ins_encode %{
11219     __ movl($dst$$Register, $src$$Address);
11220   %}
11221   ins_pipe(ialu_reg_mem);
11222 %}
11223 
11224 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11225 %{
11226   match(Set dst (AndL src mask));
11227 
11228   format %{ "movl    $dst, $src\t# zero-extend long" %}
11229   ins_encode %{
11230     __ movl($dst$$Register, $src$$Register);
11231   %}
11232   ins_pipe(ialu_reg_reg);
11233 %}
11234 
11235 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11236 %{
11237   match(Set dst (ConvL2I src));
11238 
11239   format %{ "movl    $dst, $src\t# l2i" %}
11240   ins_encode %{
11241     __ movl($dst$$Register, $src$$Register);
11242   %}
11243   ins_pipe(ialu_reg_reg);
11244 %}
11245 
11246 
11247 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11248   match(Set dst (MoveF2I src));
11249   effect(DEF dst, USE src);
11250 
11251   ins_cost(125);
11252   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11253   ins_encode %{
11254     __ movl($dst$$Register, Address(rsp, $src$$disp));
11255   %}
11256   ins_pipe(ialu_reg_mem);
11257 %}
11258 
11259 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11260   match(Set dst (MoveI2F src));
11261   effect(DEF dst, USE src);
11262 
11263   ins_cost(125);
11264   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11265   ins_encode %{
11266     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11267   %}
11268   ins_pipe(pipe_slow);
11269 %}
11270 
11271 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11272   match(Set dst (MoveD2L src));
11273   effect(DEF dst, USE src);
11274 
11275   ins_cost(125);
11276   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11277   ins_encode %{
11278     __ movq($dst$$Register, Address(rsp, $src$$disp));
11279   %}
11280   ins_pipe(ialu_reg_mem);
11281 %}
11282 
11283 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11284   predicate(!UseXmmLoadAndClearUpper);
11285   match(Set dst (MoveL2D src));
11286   effect(DEF dst, USE src);
11287 
11288   ins_cost(125);
11289   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11290   ins_encode %{
11291     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11292   %}
11293   ins_pipe(pipe_slow);
11294 %}
11295 
11296 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11297   predicate(UseXmmLoadAndClearUpper);
11298   match(Set dst (MoveL2D src));
11299   effect(DEF dst, USE src);
11300 
11301   ins_cost(125);
11302   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11303   ins_encode %{
11304     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11305   %}
11306   ins_pipe(pipe_slow);
11307 %}
11308 
11309 
11310 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11311   match(Set dst (MoveF2I src));
11312   effect(DEF dst, USE src);
11313 
11314   ins_cost(95); // XXX
11315   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11316   ins_encode %{
11317     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11318   %}
11319   ins_pipe(pipe_slow);
11320 %}
11321 
11322 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11323   match(Set dst (MoveI2F src));
11324   effect(DEF dst, USE src);
11325 
11326   ins_cost(100);
11327   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11328   ins_encode %{
11329     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11330   %}
11331   ins_pipe( ialu_mem_reg );
11332 %}
11333 
11334 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11335   match(Set dst (MoveD2L src));
11336   effect(DEF dst, USE src);
11337 
11338   ins_cost(95); // XXX
11339   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11340   ins_encode %{
11341     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11342   %}
11343   ins_pipe(pipe_slow);
11344 %}
11345 
11346 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11347   match(Set dst (MoveL2D src));
11348   effect(DEF dst, USE src);
11349 
11350   ins_cost(100);
11351   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11352   ins_encode %{
11353     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11354   %}
11355   ins_pipe(ialu_mem_reg);
11356 %}
11357 
11358 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11359   match(Set dst (MoveF2I src));
11360   effect(DEF dst, USE src);
11361   ins_cost(85);
11362   format %{ "movd    $dst,$src\t# MoveF2I" %}
11363   ins_encode %{
11364     __ movdl($dst$$Register, $src$$XMMRegister);
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11370   match(Set dst (MoveD2L src));
11371   effect(DEF dst, USE src);
11372   ins_cost(85);
11373   format %{ "movd    $dst,$src\t# MoveD2L" %}
11374   ins_encode %{
11375     __ movdq($dst$$Register, $src$$XMMRegister);
11376   %}
11377   ins_pipe( pipe_slow );
11378 %}
11379 
11380 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11381   match(Set dst (MoveI2F src));
11382   effect(DEF dst, USE src);
11383   ins_cost(100);
11384   format %{ "movd    $dst,$src\t# MoveI2F" %}
11385   ins_encode %{
11386     __ movdl($dst$$XMMRegister, $src$$Register);
11387   %}
11388   ins_pipe( pipe_slow );
11389 %}
11390 
11391 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11392   match(Set dst (MoveL2D src));
11393   effect(DEF dst, USE src);
11394   ins_cost(100);
11395   format %{ "movd    $dst,$src\t# MoveL2D" %}
11396   ins_encode %{
11397      __ movdq($dst$$XMMRegister, $src$$Register);
11398   %}
11399   ins_pipe( pipe_slow );
11400 %}
11401 
11402 // Fast clearing of an array
11403 // Small ClearArray non-AVX512.
11404 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11405                   Universe dummy, rFlagsReg cr)
11406 %{
11407   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11408   match(Set dummy (ClearArray cnt base));
11409   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11410 
11411   format %{ $$template
11412     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11413     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11414     $$emit$$"jg      LARGE\n\t"
11415     $$emit$$"dec     rcx\n\t"
11416     $$emit$$"js      DONE\t# Zero length\n\t"
11417     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11418     $$emit$$"dec     rcx\n\t"
11419     $$emit$$"jge     LOOP\n\t"
11420     $$emit$$"jmp     DONE\n\t"
11421     $$emit$$"# LARGE:\n\t"
11422     if (UseFastStosb) {
11423        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11424        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11425     } else if (UseXMMForObjInit) {
11426        $$emit$$"mov     rdi,rax\n\t"
11427        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11428        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11429        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11430        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11431        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11432        $$emit$$"add     0x40,rax\n\t"
11433        $$emit$$"# L_zero_64_bytes:\n\t"
11434        $$emit$$"sub     0x8,rcx\n\t"
11435        $$emit$$"jge     L_loop\n\t"
11436        $$emit$$"add     0x4,rcx\n\t"
11437        $$emit$$"jl      L_tail\n\t"
11438        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11439        $$emit$$"add     0x20,rax\n\t"
11440        $$emit$$"sub     0x4,rcx\n\t"
11441        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11442        $$emit$$"add     0x4,rcx\n\t"
11443        $$emit$$"jle     L_end\n\t"
11444        $$emit$$"dec     rcx\n\t"
11445        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11446        $$emit$$"vmovq   xmm0,(rax)\n\t"
11447        $$emit$$"add     0x8,rax\n\t"
11448        $$emit$$"dec     rcx\n\t"
11449        $$emit$$"jge     L_sloop\n\t"
11450        $$emit$$"# L_end:\n\t"
11451     } else {
11452        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11453     }
11454     $$emit$$"# DONE"
11455   %}
11456   ins_encode %{
11457     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11458                  $tmp$$XMMRegister, false, knoreg);
11459   %}
11460   ins_pipe(pipe_slow);
11461 %}
11462 
11463 // Small ClearArray AVX512 non-constant length.
11464 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11465                        Universe dummy, rFlagsReg cr)
11466 %{
11467   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11468   match(Set dummy (ClearArray cnt base));
11469   ins_cost(125);
11470   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11471 
11472   format %{ $$template
11473     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11474     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11475     $$emit$$"jg      LARGE\n\t"
11476     $$emit$$"dec     rcx\n\t"
11477     $$emit$$"js      DONE\t# Zero length\n\t"
11478     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11479     $$emit$$"dec     rcx\n\t"
11480     $$emit$$"jge     LOOP\n\t"
11481     $$emit$$"jmp     DONE\n\t"
11482     $$emit$$"# LARGE:\n\t"
11483     if (UseFastStosb) {
11484        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11485        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11486     } else if (UseXMMForObjInit) {
11487        $$emit$$"mov     rdi,rax\n\t"
11488        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11489        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11490        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11491        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11492        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11493        $$emit$$"add     0x40,rax\n\t"
11494        $$emit$$"# L_zero_64_bytes:\n\t"
11495        $$emit$$"sub     0x8,rcx\n\t"
11496        $$emit$$"jge     L_loop\n\t"
11497        $$emit$$"add     0x4,rcx\n\t"
11498        $$emit$$"jl      L_tail\n\t"
11499        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11500        $$emit$$"add     0x20,rax\n\t"
11501        $$emit$$"sub     0x4,rcx\n\t"
11502        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11503        $$emit$$"add     0x4,rcx\n\t"
11504        $$emit$$"jle     L_end\n\t"
11505        $$emit$$"dec     rcx\n\t"
11506        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11507        $$emit$$"vmovq   xmm0,(rax)\n\t"
11508        $$emit$$"add     0x8,rax\n\t"
11509        $$emit$$"dec     rcx\n\t"
11510        $$emit$$"jge     L_sloop\n\t"
11511        $$emit$$"# L_end:\n\t"
11512     } else {
11513        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11514     }
11515     $$emit$$"# DONE"
11516   %}
11517   ins_encode %{
11518     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11519                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11520   %}
11521   ins_pipe(pipe_slow);
11522 %}
11523 
11524 // Large ClearArray non-AVX512.
11525 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11526                         Universe dummy, rFlagsReg cr)
11527 %{
11528   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11529   match(Set dummy (ClearArray cnt base));
11530   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11531 
11532   format %{ $$template
11533     if (UseFastStosb) {
11534        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11535        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11536        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11537     } else if (UseXMMForObjInit) {
11538        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11539        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11540        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11541        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11542        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11543        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11544        $$emit$$"add     0x40,rax\n\t"
11545        $$emit$$"# L_zero_64_bytes:\n\t"
11546        $$emit$$"sub     0x8,rcx\n\t"
11547        $$emit$$"jge     L_loop\n\t"
11548        $$emit$$"add     0x4,rcx\n\t"
11549        $$emit$$"jl      L_tail\n\t"
11550        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11551        $$emit$$"add     0x20,rax\n\t"
11552        $$emit$$"sub     0x4,rcx\n\t"
11553        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11554        $$emit$$"add     0x4,rcx\n\t"
11555        $$emit$$"jle     L_end\n\t"
11556        $$emit$$"dec     rcx\n\t"
11557        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11558        $$emit$$"vmovq   xmm0,(rax)\n\t"
11559        $$emit$$"add     0x8,rax\n\t"
11560        $$emit$$"dec     rcx\n\t"
11561        $$emit$$"jge     L_sloop\n\t"
11562        $$emit$$"# L_end:\n\t"
11563     } else {
11564        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11565        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11566     }
11567   %}
11568   ins_encode %{
11569     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11570                  $tmp$$XMMRegister, true, knoreg);
11571   %}
11572   ins_pipe(pipe_slow);
11573 %}
11574 
11575 // Large ClearArray AVX512.
11576 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11577                              Universe dummy, rFlagsReg cr)
11578 %{
11579   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11580   match(Set dummy (ClearArray cnt base));
11581   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11582 
11583   format %{ $$template
11584     if (UseFastStosb) {
11585        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11586        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11587        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11588     } else if (UseXMMForObjInit) {
11589        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11590        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11591        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11592        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11593        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11594        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11595        $$emit$$"add     0x40,rax\n\t"
11596        $$emit$$"# L_zero_64_bytes:\n\t"
11597        $$emit$$"sub     0x8,rcx\n\t"
11598        $$emit$$"jge     L_loop\n\t"
11599        $$emit$$"add     0x4,rcx\n\t"
11600        $$emit$$"jl      L_tail\n\t"
11601        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11602        $$emit$$"add     0x20,rax\n\t"
11603        $$emit$$"sub     0x4,rcx\n\t"
11604        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11605        $$emit$$"add     0x4,rcx\n\t"
11606        $$emit$$"jle     L_end\n\t"
11607        $$emit$$"dec     rcx\n\t"
11608        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11609        $$emit$$"vmovq   xmm0,(rax)\n\t"
11610        $$emit$$"add     0x8,rax\n\t"
11611        $$emit$$"dec     rcx\n\t"
11612        $$emit$$"jge     L_sloop\n\t"
11613        $$emit$$"# L_end:\n\t"
11614     } else {
11615        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11616        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11617     }
11618   %}
11619   ins_encode %{
11620     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11621                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11622   %}
11623   ins_pipe(pipe_slow);
11624 %}
11625 
11626 // Small ClearArray AVX512 constant length.
11627 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11628 %{
11629   predicate(!((ClearArrayNode*)n)->is_large() &&
11630               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11631   match(Set dummy (ClearArray cnt base));
11632   ins_cost(100);
11633   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11634   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11635   ins_encode %{
11636    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11637   %}
11638   ins_pipe(pipe_slow);
11639 %}
11640 
11641 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11642                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11643 %{
11644   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11645   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11646   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11647 
11648   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11649   ins_encode %{
11650     __ string_compare($str1$$Register, $str2$$Register,
11651                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11652                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11653   %}
11654   ins_pipe( pipe_slow );
11655 %}
11656 
11657 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11658                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11659 %{
11660   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11661   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11662   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11663 
11664   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11665   ins_encode %{
11666     __ string_compare($str1$$Register, $str2$$Register,
11667                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11668                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11669   %}
11670   ins_pipe( pipe_slow );
11671 %}
11672 
11673 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11674                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11675 %{
11676   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11677   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11678   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11679 
11680   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11681   ins_encode %{
11682     __ string_compare($str1$$Register, $str2$$Register,
11683                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11684                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11685   %}
11686   ins_pipe( pipe_slow );
11687 %}
11688 
11689 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11690                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11691 %{
11692   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11693   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11694   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11695 
11696   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11697   ins_encode %{
11698     __ string_compare($str1$$Register, $str2$$Register,
11699                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11700                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11701   %}
11702   ins_pipe( pipe_slow );
11703 %}
11704 
11705 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11706                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11707 %{
11708   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11709   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11710   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11711 
11712   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11713   ins_encode %{
11714     __ string_compare($str1$$Register, $str2$$Register,
11715                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11716                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11717   %}
11718   ins_pipe( pipe_slow );
11719 %}
11720 
11721 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11722                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11723 %{
11724   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11725   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11726   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11727 
11728   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11729   ins_encode %{
11730     __ string_compare($str1$$Register, $str2$$Register,
11731                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11732                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11733   %}
11734   ins_pipe( pipe_slow );
11735 %}
11736 
11737 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11738                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11739 %{
11740   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11741   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11742   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11743 
11744   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11745   ins_encode %{
11746     __ string_compare($str2$$Register, $str1$$Register,
11747                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11748                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11749   %}
11750   ins_pipe( pipe_slow );
11751 %}
11752 
11753 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11754                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11755 %{
11756   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11757   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11758   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11759 
11760   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11761   ins_encode %{
11762     __ string_compare($str2$$Register, $str1$$Register,
11763                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11764                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11765   %}
11766   ins_pipe( pipe_slow );
11767 %}
11768 
11769 // fast search of substring with known size.
11770 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11771                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11772 %{
11773   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11774   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11775   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11776 
11777   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11778   ins_encode %{
11779     int icnt2 = (int)$int_cnt2$$constant;
11780     if (icnt2 >= 16) {
11781       // IndexOf for constant substrings with size >= 16 elements
11782       // which don't need to be loaded through stack.
11783       __ string_indexofC8($str1$$Register, $str2$$Register,
11784                           $cnt1$$Register, $cnt2$$Register,
11785                           icnt2, $result$$Register,
11786                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11787     } else {
11788       // Small strings are loaded through stack if they cross page boundary.
11789       __ string_indexof($str1$$Register, $str2$$Register,
11790                         $cnt1$$Register, $cnt2$$Register,
11791                         icnt2, $result$$Register,
11792                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11793     }
11794   %}
11795   ins_pipe( pipe_slow );
11796 %}
11797 
11798 // fast search of substring with known size.
11799 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11800                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11801 %{
11802   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11803   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11804   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11805 
11806   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11807   ins_encode %{
11808     int icnt2 = (int)$int_cnt2$$constant;
11809     if (icnt2 >= 8) {
11810       // IndexOf for constant substrings with size >= 8 elements
11811       // which don't need to be loaded through stack.
11812       __ string_indexofC8($str1$$Register, $str2$$Register,
11813                           $cnt1$$Register, $cnt2$$Register,
11814                           icnt2, $result$$Register,
11815                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11816     } else {
11817       // Small strings are loaded through stack if they cross page boundary.
11818       __ string_indexof($str1$$Register, $str2$$Register,
11819                         $cnt1$$Register, $cnt2$$Register,
11820                         icnt2, $result$$Register,
11821                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11822     }
11823   %}
11824   ins_pipe( pipe_slow );
11825 %}
11826 
11827 // fast search of substring with known size.
11828 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11829                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11830 %{
11831   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11832   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11833   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11834 
11835   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11836   ins_encode %{
11837     int icnt2 = (int)$int_cnt2$$constant;
11838     if (icnt2 >= 8) {
11839       // IndexOf for constant substrings with size >= 8 elements
11840       // which don't need to be loaded through stack.
11841       __ string_indexofC8($str1$$Register, $str2$$Register,
11842                           $cnt1$$Register, $cnt2$$Register,
11843                           icnt2, $result$$Register,
11844                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11845     } else {
11846       // Small strings are loaded through stack if they cross page boundary.
11847       __ string_indexof($str1$$Register, $str2$$Register,
11848                         $cnt1$$Register, $cnt2$$Register,
11849                         icnt2, $result$$Register,
11850                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11851     }
11852   %}
11853   ins_pipe( pipe_slow );
11854 %}
11855 
11856 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11857                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11858 %{
11859   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11860   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11861   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11862 
11863   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11864   ins_encode %{
11865     __ string_indexof($str1$$Register, $str2$$Register,
11866                       $cnt1$$Register, $cnt2$$Register,
11867                       (-1), $result$$Register,
11868                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11869   %}
11870   ins_pipe( pipe_slow );
11871 %}
11872 
11873 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11874                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11875 %{
11876   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11877   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11878   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11879 
11880   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11881   ins_encode %{
11882     __ string_indexof($str1$$Register, $str2$$Register,
11883                       $cnt1$$Register, $cnt2$$Register,
11884                       (-1), $result$$Register,
11885                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11886   %}
11887   ins_pipe( pipe_slow );
11888 %}
11889 
11890 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11891                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11892 %{
11893   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11894   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11895   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11896 
11897   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11898   ins_encode %{
11899     __ string_indexof($str1$$Register, $str2$$Register,
11900                       $cnt1$$Register, $cnt2$$Register,
11901                       (-1), $result$$Register,
11902                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11903   %}
11904   ins_pipe( pipe_slow );
11905 %}
11906 
11907 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11908                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11909 %{
11910   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11911   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11912   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11913   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11914   ins_encode %{
11915     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11916                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11917   %}
11918   ins_pipe( pipe_slow );
11919 %}
11920 
11921 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11922                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11923 %{
11924   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11925   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11926   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11927   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11928   ins_encode %{
11929     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11930                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11931   %}
11932   ins_pipe( pipe_slow );
11933 %}
11934 
11935 // fast string equals
11936 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11937                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11938 %{
11939   predicate(!VM_Version::supports_avx512vlbw());
11940   match(Set result (StrEquals (Binary str1 str2) cnt));
11941   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11942 
11943   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11944   ins_encode %{
11945     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11946                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11947                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11948   %}
11949   ins_pipe( pipe_slow );
11950 %}
11951 
11952 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11953                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
11954 %{
11955   predicate(VM_Version::supports_avx512vlbw());
11956   match(Set result (StrEquals (Binary str1 str2) cnt));
11957   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11958 
11959   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11960   ins_encode %{
11961     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11962                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11963                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11964   %}
11965   ins_pipe( pipe_slow );
11966 %}
11967 
11968 // fast array equals
11969 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11970                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11971 %{
11972   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11973   match(Set result (AryEq ary1 ary2));
11974   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11975 
11976   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11977   ins_encode %{
11978     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11979                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11980                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11981   %}
11982   ins_pipe( pipe_slow );
11983 %}
11984 
11985 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11986                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11987 %{
11988   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11989   match(Set result (AryEq ary1 ary2));
11990   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11991 
11992   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11993   ins_encode %{
11994     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11995                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11996                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11997   %}
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12002                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12003 %{
12004   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12005   match(Set result (AryEq ary1 ary2));
12006   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12007 
12008   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12009   ins_encode %{
12010     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12011                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12012                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12013   %}
12014   ins_pipe( pipe_slow );
12015 %}
12016 
12017 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12018                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12019 %{
12020   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12021   match(Set result (AryEq ary1 ary2));
12022   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12023 
12024   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12025   ins_encode %{
12026     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12027                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12028                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12029   %}
12030   ins_pipe( pipe_slow );
12031 %}
12032 
12033 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
12034                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
12035                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
12036                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
12037                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
12038 %{
12039   predicate(UseAVX >= 2);
12040   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
12041   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
12042          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
12043          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
12044          USE basic_type, KILL cr);
12045 
12046   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
12047   ins_encode %{
12048     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
12049                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12050                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
12051                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
12052                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
12053                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
12054                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
12055   %}
12056   ins_pipe( pipe_slow );
12057 %}
12058 
12059 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12060                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
12061 %{
12062   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12063   match(Set result (CountPositives ary1 len));
12064   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12065 
12066   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12067   ins_encode %{
12068     __ count_positives($ary1$$Register, $len$$Register,
12069                        $result$$Register, $tmp3$$Register,
12070                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12071   %}
12072   ins_pipe( pipe_slow );
12073 %}
12074 
12075 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12076                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
12077 %{
12078   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12079   match(Set result (CountPositives ary1 len));
12080   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12081 
12082   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12083   ins_encode %{
12084     __ count_positives($ary1$$Register, $len$$Register,
12085                        $result$$Register, $tmp3$$Register,
12086                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12087   %}
12088   ins_pipe( pipe_slow );
12089 %}
12090 
12091 // fast char[] to byte[] compression
12092 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12093                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12094   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12095   match(Set result (StrCompressedCopy src (Binary dst len)));
12096   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
12097          USE_KILL len, KILL tmp5, KILL cr);
12098 
12099   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12100   ins_encode %{
12101     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12102                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12103                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12104                            knoreg, knoreg);
12105   %}
12106   ins_pipe( pipe_slow );
12107 %}
12108 
12109 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12110                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12111   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12112   match(Set result (StrCompressedCopy src (Binary dst len)));
12113   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
12114          USE_KILL len, KILL tmp5, KILL cr);
12115 
12116   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12117   ins_encode %{
12118     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12119                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12120                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12121                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12122   %}
12123   ins_pipe( pipe_slow );
12124 %}
12125 // fast byte[] to char[] inflation
12126 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12127                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
12128   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12129   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12130   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12131 
12132   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12133   ins_encode %{
12134     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12135                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12136   %}
12137   ins_pipe( pipe_slow );
12138 %}
12139 
12140 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12141                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
12142   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12143   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12144   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12145 
12146   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12147   ins_encode %{
12148     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12149                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12150   %}
12151   ins_pipe( pipe_slow );
12152 %}
12153 
12154 // encode char[] to byte[] in ISO_8859_1
12155 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12156                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12157                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12158   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12159   match(Set result (EncodeISOArray src (Binary dst len)));
12160   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12161 
12162   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12163   ins_encode %{
12164     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12165                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12166                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12167   %}
12168   ins_pipe( pipe_slow );
12169 %}
12170 
12171 // encode char[] to byte[] in ASCII
12172 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12173                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12174                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12175   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12176   match(Set result (EncodeISOArray src (Binary dst len)));
12177   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12178 
12179   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12180   ins_encode %{
12181     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12182                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12183                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12184   %}
12185   ins_pipe( pipe_slow );
12186 %}
12187 
12188 //----------Overflow Math Instructions-----------------------------------------
12189 
12190 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12191 %{
12192   match(Set cr (OverflowAddI op1 op2));
12193   effect(DEF cr, USE_KILL op1, USE op2);
12194 
12195   format %{ "addl    $op1, $op2\t# overflow check int" %}
12196 
12197   ins_encode %{
12198     __ addl($op1$$Register, $op2$$Register);
12199   %}
12200   ins_pipe(ialu_reg_reg);
12201 %}
12202 
12203 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
12204 %{
12205   match(Set cr (OverflowAddI op1 op2));
12206   effect(DEF cr, USE_KILL op1, USE op2);
12207 
12208   format %{ "addl    $op1, $op2\t# overflow check int" %}
12209 
12210   ins_encode %{
12211     __ addl($op1$$Register, $op2$$constant);
12212   %}
12213   ins_pipe(ialu_reg_reg);
12214 %}
12215 
12216 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12217 %{
12218   match(Set cr (OverflowAddL op1 op2));
12219   effect(DEF cr, USE_KILL op1, USE op2);
12220 
12221   format %{ "addq    $op1, $op2\t# overflow check long" %}
12222   ins_encode %{
12223     __ addq($op1$$Register, $op2$$Register);
12224   %}
12225   ins_pipe(ialu_reg_reg);
12226 %}
12227 
12228 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
12229 %{
12230   match(Set cr (OverflowAddL op1 op2));
12231   effect(DEF cr, USE_KILL op1, USE op2);
12232 
12233   format %{ "addq    $op1, $op2\t# overflow check long" %}
12234   ins_encode %{
12235     __ addq($op1$$Register, $op2$$constant);
12236   %}
12237   ins_pipe(ialu_reg_reg);
12238 %}
12239 
12240 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12241 %{
12242   match(Set cr (OverflowSubI op1 op2));
12243 
12244   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12245   ins_encode %{
12246     __ cmpl($op1$$Register, $op2$$Register);
12247   %}
12248   ins_pipe(ialu_reg_reg);
12249 %}
12250 
12251 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12252 %{
12253   match(Set cr (OverflowSubI op1 op2));
12254 
12255   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12256   ins_encode %{
12257     __ cmpl($op1$$Register, $op2$$constant);
12258   %}
12259   ins_pipe(ialu_reg_reg);
12260 %}
12261 
12262 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12263 %{
12264   match(Set cr (OverflowSubL op1 op2));
12265 
12266   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12267   ins_encode %{
12268     __ cmpq($op1$$Register, $op2$$Register);
12269   %}
12270   ins_pipe(ialu_reg_reg);
12271 %}
12272 
12273 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12274 %{
12275   match(Set cr (OverflowSubL op1 op2));
12276 
12277   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12278   ins_encode %{
12279     __ cmpq($op1$$Register, $op2$$constant);
12280   %}
12281   ins_pipe(ialu_reg_reg);
12282 %}
12283 
12284 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
12285 %{
12286   match(Set cr (OverflowSubI zero op2));
12287   effect(DEF cr, USE_KILL op2);
12288 
12289   format %{ "negl    $op2\t# overflow check int" %}
12290   ins_encode %{
12291     __ negl($op2$$Register);
12292   %}
12293   ins_pipe(ialu_reg_reg);
12294 %}
12295 
12296 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
12297 %{
12298   match(Set cr (OverflowSubL zero op2));
12299   effect(DEF cr, USE_KILL op2);
12300 
12301   format %{ "negq    $op2\t# overflow check long" %}
12302   ins_encode %{
12303     __ negq($op2$$Register);
12304   %}
12305   ins_pipe(ialu_reg_reg);
12306 %}
12307 
12308 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12309 %{
12310   match(Set cr (OverflowMulI op1 op2));
12311   effect(DEF cr, USE_KILL op1, USE op2);
12312 
12313   format %{ "imull    $op1, $op2\t# overflow check int" %}
12314   ins_encode %{
12315     __ imull($op1$$Register, $op2$$Register);
12316   %}
12317   ins_pipe(ialu_reg_reg_alu0);
12318 %}
12319 
12320 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
12321 %{
12322   match(Set cr (OverflowMulI op1 op2));
12323   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12324 
12325   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
12326   ins_encode %{
12327     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
12328   %}
12329   ins_pipe(ialu_reg_reg_alu0);
12330 %}
12331 
12332 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12333 %{
12334   match(Set cr (OverflowMulL op1 op2));
12335   effect(DEF cr, USE_KILL op1, USE op2);
12336 
12337   format %{ "imulq    $op1, $op2\t# overflow check long" %}
12338   ins_encode %{
12339     __ imulq($op1$$Register, $op2$$Register);
12340   %}
12341   ins_pipe(ialu_reg_reg_alu0);
12342 %}
12343 
12344 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
12345 %{
12346   match(Set cr (OverflowMulL op1 op2));
12347   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12348 
12349   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12350   ins_encode %{
12351     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12352   %}
12353   ins_pipe(ialu_reg_reg_alu0);
12354 %}
12355 
12356 
12357 //----------Control Flow Instructions------------------------------------------
12358 // Signed compare Instructions
12359 
12360 // XXX more variants!!
12361 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12362 %{
12363   match(Set cr (CmpI op1 op2));
12364   effect(DEF cr, USE op1, USE op2);
12365 
12366   format %{ "cmpl    $op1, $op2" %}
12367   ins_encode %{
12368     __ cmpl($op1$$Register, $op2$$Register);
12369   %}
12370   ins_pipe(ialu_cr_reg_reg);
12371 %}
12372 
12373 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12374 %{
12375   match(Set cr (CmpI op1 op2));
12376 
12377   format %{ "cmpl    $op1, $op2" %}
12378   ins_encode %{
12379     __ cmpl($op1$$Register, $op2$$constant);
12380   %}
12381   ins_pipe(ialu_cr_reg_imm);
12382 %}
12383 
12384 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12385 %{
12386   match(Set cr (CmpI op1 (LoadI op2)));
12387 
12388   ins_cost(500); // XXX
12389   format %{ "cmpl    $op1, $op2" %}
12390   ins_encode %{
12391     __ cmpl($op1$$Register, $op2$$Address);
12392   %}
12393   ins_pipe(ialu_cr_reg_mem);
12394 %}
12395 
12396 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
12397 %{
12398   match(Set cr (CmpI src zero));
12399 
12400   format %{ "testl   $src, $src" %}
12401   ins_encode %{
12402     __ testl($src$$Register, $src$$Register);
12403   %}
12404   ins_pipe(ialu_cr_reg_imm);
12405 %}
12406 
12407 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
12408 %{
12409   match(Set cr (CmpI (AndI src con) zero));
12410 
12411   format %{ "testl   $src, $con" %}
12412   ins_encode %{
12413     __ testl($src$$Register, $con$$constant);
12414   %}
12415   ins_pipe(ialu_cr_reg_imm);
12416 %}
12417 
12418 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
12419 %{
12420   match(Set cr (CmpI (AndI src1 src2) zero));
12421 
12422   format %{ "testl   $src1, $src2" %}
12423   ins_encode %{
12424     __ testl($src1$$Register, $src2$$Register);
12425   %}
12426   ins_pipe(ialu_cr_reg_imm);
12427 %}
12428 
12429 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
12430 %{
12431   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12432 
12433   format %{ "testl   $src, $mem" %}
12434   ins_encode %{
12435     __ testl($src$$Register, $mem$$Address);
12436   %}
12437   ins_pipe(ialu_cr_reg_mem);
12438 %}
12439 
12440 // Unsigned compare Instructions; really, same as signed except they
12441 // produce an rFlagsRegU instead of rFlagsReg.
12442 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12443 %{
12444   match(Set cr (CmpU op1 op2));
12445 
12446   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12447   ins_encode %{
12448     __ cmpl($op1$$Register, $op2$$Register);
12449   %}
12450   ins_pipe(ialu_cr_reg_reg);
12451 %}
12452 
12453 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12454 %{
12455   match(Set cr (CmpU op1 op2));
12456 
12457   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12458   ins_encode %{
12459     __ cmpl($op1$$Register, $op2$$constant);
12460   %}
12461   ins_pipe(ialu_cr_reg_imm);
12462 %}
12463 
12464 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12465 %{
12466   match(Set cr (CmpU op1 (LoadI op2)));
12467 
12468   ins_cost(500); // XXX
12469   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12470   ins_encode %{
12471     __ cmpl($op1$$Register, $op2$$Address);
12472   %}
12473   ins_pipe(ialu_cr_reg_mem);
12474 %}
12475 
12476 // // // Cisc-spilled version of cmpU_rReg
12477 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12478 // //%{
12479 // //  match(Set cr (CmpU (LoadI op1) op2));
12480 // //
12481 // //  format %{ "CMPu   $op1,$op2" %}
12482 // //  ins_cost(500);
12483 // //  opcode(0x39);  /* Opcode 39 /r */
12484 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12485 // //%}
12486 
12487 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
12488 %{
12489   match(Set cr (CmpU src zero));
12490 
12491   format %{ "testl   $src, $src\t# unsigned" %}
12492   ins_encode %{
12493     __ testl($src$$Register, $src$$Register);
12494   %}
12495   ins_pipe(ialu_cr_reg_imm);
12496 %}
12497 
12498 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12499 %{
12500   match(Set cr (CmpP op1 op2));
12501 
12502   format %{ "cmpq    $op1, $op2\t# ptr" %}
12503   ins_encode %{
12504     __ cmpq($op1$$Register, $op2$$Register);
12505   %}
12506   ins_pipe(ialu_cr_reg_reg);
12507 %}
12508 
12509 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12510 %{
12511   match(Set cr (CmpP op1 (LoadP op2)));
12512   predicate(n->in(2)->as_Load()->barrier_data() == 0);
12513 
12514   ins_cost(500); // XXX
12515   format %{ "cmpq    $op1, $op2\t# ptr" %}
12516   ins_encode %{
12517     __ cmpq($op1$$Register, $op2$$Address);
12518   %}
12519   ins_pipe(ialu_cr_reg_mem);
12520 %}
12521 
12522 // // // Cisc-spilled version of cmpP_rReg
12523 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12524 // //%{
12525 // //  match(Set cr (CmpP (LoadP op1) op2));
12526 // //
12527 // //  format %{ "CMPu   $op1,$op2" %}
12528 // //  ins_cost(500);
12529 // //  opcode(0x39);  /* Opcode 39 /r */
12530 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12531 // //%}
12532 
12533 // XXX this is generalized by compP_rReg_mem???
12534 // Compare raw pointer (used in out-of-heap check).
12535 // Only works because non-oop pointers must be raw pointers
12536 // and raw pointers have no anti-dependencies.
12537 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12538 %{
12539   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
12540             n->in(2)->as_Load()->barrier_data() == 0);
12541   match(Set cr (CmpP op1 (LoadP op2)));
12542 
12543   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12544   ins_encode %{
12545     __ cmpq($op1$$Register, $op2$$Address);
12546   %}
12547   ins_pipe(ialu_cr_reg_mem);
12548 %}
12549 
12550 // This will generate a signed flags result. This should be OK since
12551 // any compare to a zero should be eq/neq.
12552 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12553 %{
12554   match(Set cr (CmpP src zero));
12555 
12556   format %{ "testq   $src, $src\t# ptr" %}
12557   ins_encode %{
12558     __ testq($src$$Register, $src$$Register);
12559   %}
12560   ins_pipe(ialu_cr_reg_imm);
12561 %}
12562 
12563 // This will generate a signed flags result. This should be OK since
12564 // any compare to a zero should be eq/neq.
12565 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12566 %{
12567   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12568             n->in(1)->as_Load()->barrier_data() == 0);
12569   match(Set cr (CmpP (LoadP op) zero));
12570 
12571   ins_cost(500); // XXX
12572   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12573   ins_encode %{
12574     __ testq($op$$Address, 0xFFFFFFFF);
12575   %}
12576   ins_pipe(ialu_cr_reg_imm);
12577 %}
12578 
12579 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12580 %{
12581   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12582             n->in(1)->as_Load()->barrier_data() == 0);
12583   match(Set cr (CmpP (LoadP mem) zero));
12584 
12585   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12586   ins_encode %{
12587     __ cmpq(r12, $mem$$Address);
12588   %}
12589   ins_pipe(ialu_cr_reg_mem);
12590 %}
12591 
12592 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12593 %{
12594   match(Set cr (CmpN op1 op2));
12595 
12596   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12597   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12598   ins_pipe(ialu_cr_reg_reg);
12599 %}
12600 
12601 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12602 %{
12603   match(Set cr (CmpN src (LoadN mem)));
12604 
12605   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12606   ins_encode %{
12607     __ cmpl($src$$Register, $mem$$Address);
12608   %}
12609   ins_pipe(ialu_cr_reg_mem);
12610 %}
12611 
12612 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12613   match(Set cr (CmpN op1 op2));
12614 
12615   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12616   ins_encode %{
12617     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12618   %}
12619   ins_pipe(ialu_cr_reg_imm);
12620 %}
12621 
12622 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12623 %{
12624   match(Set cr (CmpN src (LoadN mem)));
12625 
12626   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12627   ins_encode %{
12628     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12629   %}
12630   ins_pipe(ialu_cr_reg_mem);
12631 %}
12632 
12633 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12634   match(Set cr (CmpN op1 op2));
12635 
12636   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12637   ins_encode %{
12638     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12639   %}
12640   ins_pipe(ialu_cr_reg_imm);
12641 %}
12642 
12643 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12644 %{
12645   match(Set cr (CmpN src (LoadNKlass mem)));
12646 
12647   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12648   ins_encode %{
12649     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12650   %}
12651   ins_pipe(ialu_cr_reg_mem);
12652 %}
12653 
12654 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12655   match(Set cr (CmpN src zero));
12656 
12657   format %{ "testl   $src, $src\t# compressed ptr" %}
12658   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12659   ins_pipe(ialu_cr_reg_imm);
12660 %}
12661 
12662 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12663 %{
12664   predicate(CompressedOops::base() != NULL);
12665   match(Set cr (CmpN (LoadN mem) zero));
12666 
12667   ins_cost(500); // XXX
12668   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12669   ins_encode %{
12670     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12671   %}
12672   ins_pipe(ialu_cr_reg_mem);
12673 %}
12674 
12675 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12676 %{
12677   predicate(CompressedOops::base() == NULL);
12678   match(Set cr (CmpN (LoadN mem) zero));
12679 
12680   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12681   ins_encode %{
12682     __ cmpl(r12, $mem$$Address);
12683   %}
12684   ins_pipe(ialu_cr_reg_mem);
12685 %}
12686 
12687 // Yanked all unsigned pointer compare operations.
12688 // Pointer compares are done with CmpP which is already unsigned.
12689 
12690 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12691 %{
12692   match(Set cr (CmpL op1 op2));
12693 
12694   format %{ "cmpq    $op1, $op2" %}
12695   ins_encode %{
12696     __ cmpq($op1$$Register, $op2$$Register);
12697   %}
12698   ins_pipe(ialu_cr_reg_reg);
12699 %}
12700 
12701 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12702 %{
12703   match(Set cr (CmpL op1 op2));
12704 
12705   format %{ "cmpq    $op1, $op2" %}
12706   ins_encode %{
12707     __ cmpq($op1$$Register, $op2$$constant);
12708   %}
12709   ins_pipe(ialu_cr_reg_imm);
12710 %}
12711 
12712 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12713 %{
12714   match(Set cr (CmpL op1 (LoadL op2)));
12715 
12716   format %{ "cmpq    $op1, $op2" %}
12717   ins_encode %{
12718     __ cmpq($op1$$Register, $op2$$Address);
12719   %}
12720   ins_pipe(ialu_cr_reg_mem);
12721 %}
12722 
12723 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12724 %{
12725   match(Set cr (CmpL src zero));
12726 
12727   format %{ "testq   $src, $src" %}
12728   ins_encode %{
12729     __ testq($src$$Register, $src$$Register);
12730   %}
12731   ins_pipe(ialu_cr_reg_imm);
12732 %}
12733 
12734 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12735 %{
12736   match(Set cr (CmpL (AndL src con) zero));
12737 
12738   format %{ "testq   $src, $con\t# long" %}
12739   ins_encode %{
12740     __ testq($src$$Register, $con$$constant);
12741   %}
12742   ins_pipe(ialu_cr_reg_imm);
12743 %}
12744 
12745 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
12746 %{
12747   match(Set cr (CmpL (AndL src1 src2) zero));
12748 
12749   format %{ "testq   $src1, $src2\t# long" %}
12750   ins_encode %{
12751     __ testq($src1$$Register, $src2$$Register);
12752   %}
12753   ins_pipe(ialu_cr_reg_imm);
12754 %}
12755 
12756 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12757 %{
12758   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12759 
12760   format %{ "testq   $src, $mem" %}
12761   ins_encode %{
12762     __ testq($src$$Register, $mem$$Address);
12763   %}
12764   ins_pipe(ialu_cr_reg_mem);
12765 %}
12766 
12767 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12768 %{
12769   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12770 
12771   format %{ "testq   $src, $mem" %}
12772   ins_encode %{
12773     __ testq($src$$Register, $mem$$Address);
12774   %}
12775   ins_pipe(ialu_cr_reg_mem);
12776 %}
12777 
12778 // Manifest a CmpU result in an integer register.  Very painful.
12779 // This is the test to avoid.
12780 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
12781 %{
12782   match(Set dst (CmpU3 src1 src2));
12783   effect(KILL flags);
12784 
12785   ins_cost(275); // XXX
12786   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
12787             "movl    $dst, -1\n\t"
12788             "jb,u    done\n\t"
12789             "setne   $dst\n\t"
12790             "movzbl  $dst, $dst\n\t"
12791     "done:" %}
12792   ins_encode %{
12793     Label done;
12794     __ cmpl($src1$$Register, $src2$$Register);
12795     __ movl($dst$$Register, -1);
12796     __ jccb(Assembler::below, done);
12797     __ setb(Assembler::notZero, $dst$$Register);
12798     __ movzbl($dst$$Register, $dst$$Register);
12799     __ bind(done);
12800   %}
12801   ins_pipe(pipe_slow);
12802 %}
12803 
12804 // Manifest a CmpL result in an integer register.  Very painful.
12805 // This is the test to avoid.
12806 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12807 %{
12808   match(Set dst (CmpL3 src1 src2));
12809   effect(KILL flags);
12810 
12811   ins_cost(275); // XXX
12812   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12813             "movl    $dst, -1\n\t"
12814             "jl,s    done\n\t"
12815             "setne   $dst\n\t"
12816             "movzbl  $dst, $dst\n\t"
12817     "done:" %}
12818   ins_encode %{
12819     Label done;
12820     __ cmpq($src1$$Register, $src2$$Register);
12821     __ movl($dst$$Register, -1);
12822     __ jccb(Assembler::less, done);
12823     __ setb(Assembler::notZero, $dst$$Register);
12824     __ movzbl($dst$$Register, $dst$$Register);
12825     __ bind(done);
12826   %}
12827   ins_pipe(pipe_slow);
12828 %}
12829 
12830 // Manifest a CmpUL result in an integer register.  Very painful.
12831 // This is the test to avoid.
12832 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12833 %{
12834   match(Set dst (CmpUL3 src1 src2));
12835   effect(KILL flags);
12836 
12837   ins_cost(275); // XXX
12838   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12839             "movl    $dst, -1\n\t"
12840             "jb,u    done\n\t"
12841             "setne   $dst\n\t"
12842             "movzbl  $dst, $dst\n\t"
12843     "done:" %}
12844   ins_encode %{
12845     Label done;
12846     __ cmpq($src1$$Register, $src2$$Register);
12847     __ movl($dst$$Register, -1);
12848     __ jccb(Assembler::below, done);
12849     __ setb(Assembler::notZero, $dst$$Register);
12850     __ movzbl($dst$$Register, $dst$$Register);
12851     __ bind(done);
12852   %}
12853   ins_pipe(pipe_slow);
12854 %}
12855 
12856 // Unsigned long compare Instructions; really, same as signed long except they
12857 // produce an rFlagsRegU instead of rFlagsReg.
12858 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
12859 %{
12860   match(Set cr (CmpUL op1 op2));
12861 
12862   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12863   ins_encode %{
12864     __ cmpq($op1$$Register, $op2$$Register);
12865   %}
12866   ins_pipe(ialu_cr_reg_reg);
12867 %}
12868 
12869 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
12870 %{
12871   match(Set cr (CmpUL op1 op2));
12872 
12873   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12874   ins_encode %{
12875     __ cmpq($op1$$Register, $op2$$constant);
12876   %}
12877   ins_pipe(ialu_cr_reg_imm);
12878 %}
12879 
12880 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
12881 %{
12882   match(Set cr (CmpUL op1 (LoadL op2)));
12883 
12884   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12885   ins_encode %{
12886     __ cmpq($op1$$Register, $op2$$Address);
12887   %}
12888   ins_pipe(ialu_cr_reg_mem);
12889 %}
12890 
12891 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
12892 %{
12893   match(Set cr (CmpUL src zero));
12894 
12895   format %{ "testq   $src, $src\t# unsigned" %}
12896   ins_encode %{
12897     __ testq($src$$Register, $src$$Register);
12898   %}
12899   ins_pipe(ialu_cr_reg_imm);
12900 %}
12901 
12902 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
12903 %{
12904   match(Set cr (CmpI (LoadB mem) imm));
12905 
12906   ins_cost(125);
12907   format %{ "cmpb    $mem, $imm" %}
12908   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
12909   ins_pipe(ialu_cr_reg_mem);
12910 %}
12911 
12912 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
12913 %{
12914   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
12915 
12916   ins_cost(125);
12917   format %{ "testb   $mem, $imm\t# ubyte" %}
12918   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12919   ins_pipe(ialu_cr_reg_mem);
12920 %}
12921 
12922 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
12923 %{
12924   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
12925 
12926   ins_cost(125);
12927   format %{ "testb   $mem, $imm\t# byte" %}
12928   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12929   ins_pipe(ialu_cr_reg_mem);
12930 %}
12931 
12932 //----------Max and Min--------------------------------------------------------
12933 // Min Instructions
12934 
12935 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12936 %{
12937   effect(USE_DEF dst, USE src, USE cr);
12938 
12939   format %{ "cmovlgt $dst, $src\t# min" %}
12940   ins_encode %{
12941     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
12942   %}
12943   ins_pipe(pipe_cmov_reg);
12944 %}
12945 
12946 
12947 instruct minI_rReg(rRegI dst, rRegI src)
12948 %{
12949   match(Set dst (MinI dst src));
12950 
12951   ins_cost(200);
12952   expand %{
12953     rFlagsReg cr;
12954     compI_rReg(cr, dst, src);
12955     cmovI_reg_g(dst, src, cr);
12956   %}
12957 %}
12958 
12959 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12960 %{
12961   effect(USE_DEF dst, USE src, USE cr);
12962 
12963   format %{ "cmovllt $dst, $src\t# max" %}
12964   ins_encode %{
12965     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
12966   %}
12967   ins_pipe(pipe_cmov_reg);
12968 %}
12969 
12970 
12971 instruct maxI_rReg(rRegI dst, rRegI src)
12972 %{
12973   match(Set dst (MaxI dst src));
12974 
12975   ins_cost(200);
12976   expand %{
12977     rFlagsReg cr;
12978     compI_rReg(cr, dst, src);
12979     cmovI_reg_l(dst, src, cr);
12980   %}
12981 %}
12982 
12983 // ============================================================================
12984 // Branch Instructions
12985 
12986 // Jump Direct - Label defines a relative address from JMP+1
12987 instruct jmpDir(label labl)
12988 %{
12989   match(Goto);
12990   effect(USE labl);
12991 
12992   ins_cost(300);
12993   format %{ "jmp     $labl" %}
12994   size(5);
12995   ins_encode %{
12996     Label* L = $labl$$label;
12997     __ jmp(*L, false); // Always long jump
12998   %}
12999   ins_pipe(pipe_jmp);
13000 %}
13001 
13002 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13003 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
13004 %{
13005   match(If cop cr);
13006   effect(USE labl);
13007 
13008   ins_cost(300);
13009   format %{ "j$cop     $labl" %}
13010   size(6);
13011   ins_encode %{
13012     Label* L = $labl$$label;
13013     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13014   %}
13015   ins_pipe(pipe_jcc);
13016 %}
13017 
13018 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13019 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
13020 %{
13021   match(CountedLoopEnd cop cr);
13022   effect(USE labl);
13023 
13024   ins_cost(300);
13025   format %{ "j$cop     $labl\t# loop end" %}
13026   size(6);
13027   ins_encode %{
13028     Label* L = $labl$$label;
13029     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13030   %}
13031   ins_pipe(pipe_jcc);
13032 %}
13033 
13034 // Jump Direct Conditional - using unsigned comparison
13035 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13036   match(If cop cmp);
13037   effect(USE labl);
13038 
13039   ins_cost(300);
13040   format %{ "j$cop,u   $labl" %}
13041   size(6);
13042   ins_encode %{
13043     Label* L = $labl$$label;
13044     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13045   %}
13046   ins_pipe(pipe_jcc);
13047 %}
13048 
13049 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13050   match(If cop cmp);
13051   effect(USE labl);
13052 
13053   ins_cost(200);
13054   format %{ "j$cop,u   $labl" %}
13055   size(6);
13056   ins_encode %{
13057     Label* L = $labl$$label;
13058     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13059   %}
13060   ins_pipe(pipe_jcc);
13061 %}
13062 
13063 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13064   match(If cop cmp);
13065   effect(USE labl);
13066 
13067   ins_cost(200);
13068   format %{ $$template
13069     if ($cop$$cmpcode == Assembler::notEqual) {
13070       $$emit$$"jp,u    $labl\n\t"
13071       $$emit$$"j$cop,u   $labl"
13072     } else {
13073       $$emit$$"jp,u    done\n\t"
13074       $$emit$$"j$cop,u   $labl\n\t"
13075       $$emit$$"done:"
13076     }
13077   %}
13078   ins_encode %{
13079     Label* l = $labl$$label;
13080     if ($cop$$cmpcode == Assembler::notEqual) {
13081       __ jcc(Assembler::parity, *l, false);
13082       __ jcc(Assembler::notEqual, *l, false);
13083     } else if ($cop$$cmpcode == Assembler::equal) {
13084       Label done;
13085       __ jccb(Assembler::parity, done);
13086       __ jcc(Assembler::equal, *l, false);
13087       __ bind(done);
13088     } else {
13089        ShouldNotReachHere();
13090     }
13091   %}
13092   ins_pipe(pipe_jcc);
13093 %}
13094 
13095 // ============================================================================
13096 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
13097 // superklass array for an instance of the superklass.  Set a hidden
13098 // internal cache on a hit (cache is checked with exposed code in
13099 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13100 // encoding ALSO sets flags.
13101 
13102 instruct partialSubtypeCheck(rdi_RegP result,
13103                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13104                              rFlagsReg cr)
13105 %{
13106   match(Set result (PartialSubtypeCheck sub super));
13107   effect(KILL rcx, KILL cr);
13108 
13109   ins_cost(1100);  // slightly larger than the next version
13110   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13111             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13112             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13113             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
13114             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
13115             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13116             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
13117     "miss:\t" %}
13118 
13119   opcode(0x1); // Force a XOR of RDI
13120   ins_encode(enc_PartialSubtypeCheck());
13121   ins_pipe(pipe_slow);
13122 %}
13123 
13124 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
13125                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13126                                      immP0 zero,
13127                                      rdi_RegP result)
13128 %{
13129   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13130   effect(KILL rcx, KILL result);
13131 
13132   ins_cost(1000);
13133   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13134             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13135             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13136             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
13137             "jne,s   miss\t\t# Missed: flags nz\n\t"
13138             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13139     "miss:\t" %}
13140 
13141   opcode(0x0); // No need to XOR RDI
13142   ins_encode(enc_PartialSubtypeCheck());
13143   ins_pipe(pipe_slow);
13144 %}
13145 
13146 // ============================================================================
13147 // Branch Instructions -- short offset versions
13148 //
13149 // These instructions are used to replace jumps of a long offset (the default
13150 // match) with jumps of a shorter offset.  These instructions are all tagged
13151 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13152 // match rules in general matching.  Instead, the ADLC generates a conversion
13153 // method in the MachNode which can be used to do in-place replacement of the
13154 // long variant with the shorter variant.  The compiler will determine if a
13155 // branch can be taken by the is_short_branch_offset() predicate in the machine
13156 // specific code section of the file.
13157 
13158 // Jump Direct - Label defines a relative address from JMP+1
13159 instruct jmpDir_short(label labl) %{
13160   match(Goto);
13161   effect(USE labl);
13162 
13163   ins_cost(300);
13164   format %{ "jmp,s   $labl" %}
13165   size(2);
13166   ins_encode %{
13167     Label* L = $labl$$label;
13168     __ jmpb(*L);
13169   %}
13170   ins_pipe(pipe_jmp);
13171   ins_short_branch(1);
13172 %}
13173 
13174 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13175 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
13176   match(If cop cr);
13177   effect(USE labl);
13178 
13179   ins_cost(300);
13180   format %{ "j$cop,s   $labl" %}
13181   size(2);
13182   ins_encode %{
13183     Label* L = $labl$$label;
13184     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13185   %}
13186   ins_pipe(pipe_jcc);
13187   ins_short_branch(1);
13188 %}
13189 
13190 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13191 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
13192   match(CountedLoopEnd cop cr);
13193   effect(USE labl);
13194 
13195   ins_cost(300);
13196   format %{ "j$cop,s   $labl\t# loop end" %}
13197   size(2);
13198   ins_encode %{
13199     Label* L = $labl$$label;
13200     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13201   %}
13202   ins_pipe(pipe_jcc);
13203   ins_short_branch(1);
13204 %}
13205 
13206 // Jump Direct Conditional - using unsigned comparison
13207 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13208   match(If cop cmp);
13209   effect(USE labl);
13210 
13211   ins_cost(300);
13212   format %{ "j$cop,us  $labl" %}
13213   size(2);
13214   ins_encode %{
13215     Label* L = $labl$$label;
13216     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13217   %}
13218   ins_pipe(pipe_jcc);
13219   ins_short_branch(1);
13220 %}
13221 
13222 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13223   match(If cop cmp);
13224   effect(USE labl);
13225 
13226   ins_cost(300);
13227   format %{ "j$cop,us  $labl" %}
13228   size(2);
13229   ins_encode %{
13230     Label* L = $labl$$label;
13231     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13232   %}
13233   ins_pipe(pipe_jcc);
13234   ins_short_branch(1);
13235 %}
13236 
13237 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13238   match(If cop cmp);
13239   effect(USE labl);
13240 
13241   ins_cost(300);
13242   format %{ $$template
13243     if ($cop$$cmpcode == Assembler::notEqual) {
13244       $$emit$$"jp,u,s  $labl\n\t"
13245       $$emit$$"j$cop,u,s  $labl"
13246     } else {
13247       $$emit$$"jp,u,s  done\n\t"
13248       $$emit$$"j$cop,u,s  $labl\n\t"
13249       $$emit$$"done:"
13250     }
13251   %}
13252   size(4);
13253   ins_encode %{
13254     Label* l = $labl$$label;
13255     if ($cop$$cmpcode == Assembler::notEqual) {
13256       __ jccb(Assembler::parity, *l);
13257       __ jccb(Assembler::notEqual, *l);
13258     } else if ($cop$$cmpcode == Assembler::equal) {
13259       Label done;
13260       __ jccb(Assembler::parity, done);
13261       __ jccb(Assembler::equal, *l);
13262       __ bind(done);
13263     } else {
13264        ShouldNotReachHere();
13265     }
13266   %}
13267   ins_pipe(pipe_jcc);
13268   ins_short_branch(1);
13269 %}
13270 
13271 // ============================================================================
13272 // inlined locking and unlocking
13273 
13274 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
13275   predicate(Compile::current()->use_rtm());
13276   match(Set cr (FastLock object box));
13277   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13278   ins_cost(300);
13279   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13280   ins_encode %{
13281     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13282                  $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread,
13283                  _rtm_counters, _stack_rtm_counters,
13284                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13285                  true, ra_->C->profile_rtm());
13286   %}
13287   ins_pipe(pipe_slow);
13288 %}
13289 
13290 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
13291   predicate(!Compile::current()->use_rtm());
13292   match(Set cr (FastLock object box));
13293   effect(TEMP tmp, TEMP scr, USE_KILL box);
13294   ins_cost(300);
13295   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
13296   ins_encode %{
13297     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13298                  $scr$$Register, noreg, noreg, r15_thread, nullptr, nullptr, nullptr, false, false);
13299   %}
13300   ins_pipe(pipe_slow);
13301 %}
13302 
13303 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13304   match(Set cr (FastUnlock object box));
13305   effect(TEMP tmp, USE_KILL box);
13306   ins_cost(300);
13307   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13308   ins_encode %{
13309     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13310   %}
13311   ins_pipe(pipe_slow);
13312 %}
13313 
13314 
13315 // ============================================================================
13316 // Safepoint Instructions
13317 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13318 %{
13319   match(SafePoint poll);
13320   effect(KILL cr, USE poll);
13321 
13322   format %{ "testl   rax, [$poll]\t"
13323             "# Safepoint: poll for GC" %}
13324   ins_cost(125);
13325   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13326   ins_encode %{
13327     __ relocate(relocInfo::poll_type);
13328     address pre_pc = __ pc();
13329     __ testl(rax, Address($poll$$Register, 0));
13330     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13331   %}
13332   ins_pipe(ialu_reg_mem);
13333 %}
13334 
13335 instruct mask_all_evexL(kReg dst, rRegL src) %{
13336   match(Set dst (MaskAll src));
13337   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
13338   ins_encode %{
13339     int mask_len = Matcher::vector_length(this);
13340     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13341   %}
13342   ins_pipe( pipe_slow );
13343 %}
13344 
13345 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
13346   predicate(Matcher::vector_length(n) > 32);
13347   match(Set dst (MaskAll src));
13348   effect(TEMP tmp);
13349   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
13350   ins_encode %{
13351     int mask_len = Matcher::vector_length(this);
13352     __ movslq($tmp$$Register, $src$$Register);
13353     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
13354   %}
13355   ins_pipe( pipe_slow );
13356 %}
13357 
13358 // ============================================================================
13359 // Procedure Call/Return Instructions
13360 // Call Java Static Instruction
13361 // Note: If this code changes, the corresponding ret_addr_offset() and
13362 //       compute_padding() functions will have to be adjusted.
13363 instruct CallStaticJavaDirect(method meth) %{
13364   match(CallStaticJava);
13365   effect(USE meth);
13366 
13367   ins_cost(300);
13368   format %{ "call,static " %}
13369   opcode(0xE8); /* E8 cd */
13370   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13371   ins_pipe(pipe_slow);
13372   ins_alignment(4);
13373 %}
13374 
13375 // Call Java Dynamic Instruction
13376 // Note: If this code changes, the corresponding ret_addr_offset() and
13377 //       compute_padding() functions will have to be adjusted.
13378 instruct CallDynamicJavaDirect(method meth)
13379 %{
13380   match(CallDynamicJava);
13381   effect(USE meth);
13382 
13383   ins_cost(300);
13384   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13385             "call,dynamic " %}
13386   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13387   ins_pipe(pipe_slow);
13388   ins_alignment(4);
13389 %}
13390 
13391 // Call Runtime Instruction
13392 instruct CallRuntimeDirect(method meth)
13393 %{
13394   match(CallRuntime);
13395   effect(USE meth);
13396 
13397   ins_cost(300);
13398   format %{ "call,runtime " %}
13399   ins_encode(clear_avx, Java_To_Runtime(meth));
13400   ins_pipe(pipe_slow);
13401 %}
13402 
13403 // Call runtime without safepoint
13404 instruct CallLeafDirect(method meth)
13405 %{
13406   match(CallLeaf);
13407   effect(USE meth);
13408 
13409   ins_cost(300);
13410   format %{ "call_leaf,runtime " %}
13411   ins_encode(clear_avx, Java_To_Runtime(meth));
13412   ins_pipe(pipe_slow);
13413 %}
13414 
13415 // Call runtime without safepoint and with vector arguments
13416 instruct CallLeafDirectVector(method meth)
13417 %{
13418   match(CallLeafVector);
13419   effect(USE meth);
13420 
13421   ins_cost(300);
13422   format %{ "call_leaf,vector " %}
13423   ins_encode(Java_To_Runtime(meth));
13424   ins_pipe(pipe_slow);
13425 %}
13426 
13427 // Call runtime without safepoint
13428 instruct CallLeafNoFPDirect(method meth)
13429 %{
13430   match(CallLeafNoFP);
13431   effect(USE meth);
13432 
13433   ins_cost(300);
13434   format %{ "call_leaf_nofp,runtime " %}
13435   ins_encode(clear_avx, Java_To_Runtime(meth));
13436   ins_pipe(pipe_slow);
13437 %}
13438 
13439 // Return Instruction
13440 // Remove the return address & jump to it.
13441 // Notice: We always emit a nop after a ret to make sure there is room
13442 // for safepoint patching
13443 instruct Ret()
13444 %{
13445   match(Return);
13446 
13447   format %{ "ret" %}
13448   ins_encode %{
13449     __ ret(0);
13450   %}
13451   ins_pipe(pipe_jmp);
13452 %}
13453 
13454 // Tail Call; Jump from runtime stub to Java code.
13455 // Also known as an 'interprocedural jump'.
13456 // Target of jump will eventually return to caller.
13457 // TailJump below removes the return address.
13458 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
13459 // emitted just above the TailCall which has reset rbp to the caller state.
13460 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
13461 %{
13462   match(TailCall jump_target method_ptr);
13463 
13464   ins_cost(300);
13465   format %{ "jmp     $jump_target\t# rbx holds method" %}
13466   ins_encode %{
13467     __ jmp($jump_target$$Register);
13468   %}
13469   ins_pipe(pipe_jmp);
13470 %}
13471 
13472 // Tail Jump; remove the return address; jump to target.
13473 // TailCall above leaves the return address around.
13474 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13475 %{
13476   match(TailJump jump_target ex_oop);
13477 
13478   ins_cost(300);
13479   format %{ "popq    rdx\t# pop return address\n\t"
13480             "jmp     $jump_target" %}
13481   ins_encode %{
13482     __ popq(as_Register(RDX_enc));
13483     __ jmp($jump_target$$Register);
13484   %}
13485   ins_pipe(pipe_jmp);
13486 %}
13487 
13488 // Create exception oop: created by stack-crawling runtime code.
13489 // Created exception is now available to this handler, and is setup
13490 // just prior to jumping to this handler.  No code emitted.
13491 instruct CreateException(rax_RegP ex_oop)
13492 %{
13493   match(Set ex_oop (CreateEx));
13494 
13495   size(0);
13496   // use the following format syntax
13497   format %{ "# exception oop is in rax; no code emitted" %}
13498   ins_encode();
13499   ins_pipe(empty);
13500 %}
13501 
13502 // Rethrow exception:
13503 // The exception oop will come in the first argument position.
13504 // Then JUMP (not call) to the rethrow stub code.
13505 instruct RethrowException()
13506 %{
13507   match(Rethrow);
13508 
13509   // use the following format syntax
13510   format %{ "jmp     rethrow_stub" %}
13511   ins_encode(enc_rethrow);
13512   ins_pipe(pipe_jmp);
13513 %}
13514 
13515 // ============================================================================
13516 // This name is KNOWN by the ADLC and cannot be changed.
13517 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13518 // for this guy.
13519 instruct tlsLoadP(r15_RegP dst) %{
13520   match(Set dst (ThreadLocal));
13521   effect(DEF dst);
13522 
13523   size(0);
13524   format %{ "# TLS is in R15" %}
13525   ins_encode( /*empty encoding*/ );
13526   ins_pipe(ialu_reg_reg);
13527 %}
13528 
13529 
13530 //----------PEEPHOLE RULES-----------------------------------------------------
13531 // These must follow all instruction definitions as they use the names
13532 // defined in the instructions definitions.
13533 //
13534 // peeppredicate ( rule_predicate );
13535 // // the predicate unless which the peephole rule will be ignored
13536 //
13537 // peepmatch ( root_instr_name [preceding_instruction]* );
13538 //
13539 // peepprocedure ( procedure_name );
13540 // // provide a procedure name to perform the optimization, the procedure should
13541 // // reside in the architecture dependent peephole file, the method has the
13542 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
13543 // // with the arguments being the basic block, the current node index inside the
13544 // // block, the register allocator, the functions upon invoked return a new node
13545 // // defined in peepreplace, and the rules of the nodes appearing in the
13546 // // corresponding peepmatch, the function return true if successful, else
13547 // // return false
13548 //
13549 // peepconstraint %{
13550 // (instruction_number.operand_name relational_op instruction_number.operand_name
13551 //  [, ...] );
13552 // // instruction numbers are zero-based using left to right order in peepmatch
13553 //
13554 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13555 // // provide an instruction_number.operand_name for each operand that appears
13556 // // in the replacement instruction's match rule
13557 //
13558 // ---------VM FLAGS---------------------------------------------------------
13559 //
13560 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13561 //
13562 // Each peephole rule is given an identifying number starting with zero and
13563 // increasing by one in the order seen by the parser.  An individual peephole
13564 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13565 // on the command-line.
13566 //
13567 // ---------CURRENT LIMITATIONS----------------------------------------------
13568 //
13569 // Only transformations inside a basic block (do we need more for peephole)
13570 //
13571 // ---------EXAMPLE----------------------------------------------------------
13572 //
13573 // // pertinent parts of existing instructions in architecture description
13574 // instruct movI(rRegI dst, rRegI src)
13575 // %{
13576 //   match(Set dst (CopyI src));
13577 // %}
13578 //
13579 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13580 // %{
13581 //   match(Set dst (AddI dst src));
13582 //   effect(KILL cr);
13583 // %}
13584 //
13585 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
13586 // %{
13587 //   match(Set dst (AddI dst src));
13588 // %}
13589 //
13590 // 1. Simple replacement
13591 // - Only match adjacent instructions in same basic block
13592 // - Only equality constraints
13593 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
13594 // - Only one replacement instruction
13595 //
13596 // // Change (inc mov) to lea
13597 // peephole %{
13598 //   // lea should only be emitted when beneficial
13599 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13600 //   // increment preceded by register-register move
13601 //   peepmatch ( incI_rReg movI );
13602 //   // require that the destination register of the increment
13603 //   // match the destination register of the move
13604 //   peepconstraint ( 0.dst == 1.dst );
13605 //   // construct a replacement instruction that sets
13606 //   // the destination to ( move's source register + one )
13607 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13608 // %}
13609 //
13610 // 2. Procedural replacement
13611 // - More flexible finding relevent nodes
13612 // - More flexible constraints
13613 // - More flexible transformations
13614 // - May utilise architecture-dependent API more effectively
13615 // - Currently only one replacement instruction due to adlc parsing capabilities
13616 //
13617 // // Change (inc mov) to lea
13618 // peephole %{
13619 //   // lea should only be emitted when beneficial
13620 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13621 //   // the rule numbers of these nodes inside are passed into the function below
13622 //   peepmatch ( incI_rReg movI );
13623 //   // the method that takes the responsibility of transformation
13624 //   peepprocedure ( inc_mov_to_lea );
13625 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
13626 //   // node is passed into the function above
13627 //   peepreplace ( leaI_rReg_immI() );
13628 // %}
13629 
13630 // These instructions is not matched by the matcher but used by the peephole
13631 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
13632 %{
13633   predicate(false);
13634   match(Set dst (AddI src1 src2));
13635   format %{ "leal    $dst, [$src1 + $src2]" %}
13636   ins_encode %{
13637     Register dst = $dst$$Register;
13638     Register src1 = $src1$$Register;
13639     Register src2 = $src2$$Register;
13640     if (src1 != rbp && src1 != r13) {
13641       __ leal(dst, Address(src1, src2, Address::times_1));
13642     } else {
13643       assert(src2 != rbp && src2 != r13, "");
13644       __ leal(dst, Address(src2, src1, Address::times_1));
13645     }
13646   %}
13647   ins_pipe(ialu_reg_reg);
13648 %}
13649 
13650 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
13651 %{
13652   predicate(false);
13653   match(Set dst (AddI src1 src2));
13654   format %{ "leal    $dst, [$src1 + $src2]" %}
13655   ins_encode %{
13656     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
13657   %}
13658   ins_pipe(ialu_reg_reg);
13659 %}
13660 
13661 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
13662 %{
13663   predicate(false);
13664   match(Set dst (LShiftI src shift));
13665   format %{ "leal    $dst, [$src << $shift]" %}
13666   ins_encode %{
13667     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13668     Register src = $src$$Register;
13669     if (scale == Address::times_2 && src != rbp && src != r13) {
13670       __ leal($dst$$Register, Address(src, src, Address::times_1));
13671     } else {
13672       __ leal($dst$$Register, Address(noreg, src, scale));
13673     }
13674   %}
13675   ins_pipe(ialu_reg_reg);
13676 %}
13677 
13678 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
13679 %{
13680   predicate(false);
13681   match(Set dst (AddL src1 src2));
13682   format %{ "leaq    $dst, [$src1 + $src2]" %}
13683   ins_encode %{
13684     Register dst = $dst$$Register;
13685     Register src1 = $src1$$Register;
13686     Register src2 = $src2$$Register;
13687     if (src1 != rbp && src1 != r13) {
13688       __ leaq(dst, Address(src1, src2, Address::times_1));
13689     } else {
13690       assert(src2 != rbp && src2 != r13, "");
13691       __ leaq(dst, Address(src2, src1, Address::times_1));
13692     }
13693   %}
13694   ins_pipe(ialu_reg_reg);
13695 %}
13696 
13697 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
13698 %{
13699   predicate(false);
13700   match(Set dst (AddL src1 src2));
13701   format %{ "leaq    $dst, [$src1 + $src2]" %}
13702   ins_encode %{
13703     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
13704   %}
13705   ins_pipe(ialu_reg_reg);
13706 %}
13707 
13708 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
13709 %{
13710   predicate(false);
13711   match(Set dst (LShiftL src shift));
13712   format %{ "leaq    $dst, [$src << $shift]" %}
13713   ins_encode %{
13714     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13715     Register src = $src$$Register;
13716     if (scale == Address::times_2 && src != rbp && src != r13) {
13717       __ leaq($dst$$Register, Address(src, src, Address::times_1));
13718     } else {
13719       __ leaq($dst$$Register, Address(noreg, src, scale));
13720     }
13721   %}
13722   ins_pipe(ialu_reg_reg);
13723 %}
13724 
13725 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
13726 // sal}) with lea instructions. The {add, sal} rules are beneficial in
13727 // processors with at least partial ALU support for lea
13728 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
13729 // beneficial for processors with full ALU support
13730 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
13731 
13732 peephole
13733 %{
13734   peeppredicate(VM_Version::supports_fast_2op_lea());
13735   peepmatch (addI_rReg);
13736   peepprocedure (lea_coalesce_reg);
13737   peepreplace (leaI_rReg_rReg_peep());
13738 %}
13739 
13740 peephole
13741 %{
13742   peeppredicate(VM_Version::supports_fast_2op_lea());
13743   peepmatch (addI_rReg_imm);
13744   peepprocedure (lea_coalesce_imm);
13745   peepreplace (leaI_rReg_immI_peep());
13746 %}
13747 
13748 peephole
13749 %{
13750   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13751                 VM_Version::is_intel_cascade_lake());
13752   peepmatch (incI_rReg);
13753   peepprocedure (lea_coalesce_imm);
13754   peepreplace (leaI_rReg_immI_peep());
13755 %}
13756 
13757 peephole
13758 %{
13759   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13760                 VM_Version::is_intel_cascade_lake());
13761   peepmatch (decI_rReg);
13762   peepprocedure (lea_coalesce_imm);
13763   peepreplace (leaI_rReg_immI_peep());
13764 %}
13765 
13766 peephole
13767 %{
13768   peeppredicate(VM_Version::supports_fast_2op_lea());
13769   peepmatch (salI_rReg_immI2);
13770   peepprocedure (lea_coalesce_imm);
13771   peepreplace (leaI_rReg_immI2_peep());
13772 %}
13773 
13774 peephole
13775 %{
13776   peeppredicate(VM_Version::supports_fast_2op_lea());
13777   peepmatch (addL_rReg);
13778   peepprocedure (lea_coalesce_reg);
13779   peepreplace (leaL_rReg_rReg_peep());
13780 %}
13781 
13782 peephole
13783 %{
13784   peeppredicate(VM_Version::supports_fast_2op_lea());
13785   peepmatch (addL_rReg_imm);
13786   peepprocedure (lea_coalesce_imm);
13787   peepreplace (leaL_rReg_immL32_peep());
13788 %}
13789 
13790 peephole
13791 %{
13792   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13793                 VM_Version::is_intel_cascade_lake());
13794   peepmatch (incL_rReg);
13795   peepprocedure (lea_coalesce_imm);
13796   peepreplace (leaL_rReg_immL32_peep());
13797 %}
13798 
13799 peephole
13800 %{
13801   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13802                 VM_Version::is_intel_cascade_lake());
13803   peepmatch (decL_rReg);
13804   peepprocedure (lea_coalesce_imm);
13805   peepreplace (leaL_rReg_immL32_peep());
13806 %}
13807 
13808 peephole
13809 %{
13810   peeppredicate(VM_Version::supports_fast_2op_lea());
13811   peepmatch (salL_rReg_immI2);
13812   peepprocedure (lea_coalesce_imm);
13813   peepreplace (leaL_rReg_immI2_peep());
13814 %}
13815 
13816 //----------SMARTSPILL RULES---------------------------------------------------
13817 // These must follow all instruction definitions as they use the names
13818 // defined in the instructions definitions.