1 //
    2 // Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RDX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RDI int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 
  323 source_hpp %{
  324 
  325 #include "peephole_x86_64.hpp"
  326 
  327 %}
  328 
  329 // Register masks
  330 source_hpp %{
  331 
  332 extern RegMask _ANY_REG_mask;
  333 extern RegMask _PTR_REG_mask;
  334 extern RegMask _PTR_REG_NO_RBP_mask;
  335 extern RegMask _PTR_NO_RAX_REG_mask;
  336 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  337 extern RegMask _LONG_REG_mask;
  338 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  339 extern RegMask _LONG_NO_RCX_REG_mask;
  340 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  341 extern RegMask _INT_REG_mask;
  342 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  343 extern RegMask _INT_NO_RCX_REG_mask;
  344 extern RegMask _INT_NO_RBP_R13_REG_mask;
  345 extern RegMask _FLOAT_REG_mask;
  346 
  347 extern RegMask _STACK_OR_PTR_REG_mask;
  348 extern RegMask _STACK_OR_LONG_REG_mask;
  349 extern RegMask _STACK_OR_INT_REG_mask;
  350 
  351 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  352 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  353 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  354 
  355 %}
  356 
  357 source %{
  358 #define   RELOC_IMM64    Assembler::imm_operand
  359 #define   RELOC_DISP32   Assembler::disp32_operand
  360 
  361 #define __ _masm.
  362 
  363 RegMask _ANY_REG_mask;
  364 RegMask _PTR_REG_mask;
  365 RegMask _PTR_REG_NO_RBP_mask;
  366 RegMask _PTR_NO_RAX_REG_mask;
  367 RegMask _PTR_NO_RAX_RBX_REG_mask;
  368 RegMask _LONG_REG_mask;
  369 RegMask _LONG_NO_RAX_RDX_REG_mask;
  370 RegMask _LONG_NO_RCX_REG_mask;
  371 RegMask _LONG_NO_RBP_R13_REG_mask;
  372 RegMask _INT_REG_mask;
  373 RegMask _INT_NO_RAX_RDX_REG_mask;
  374 RegMask _INT_NO_RCX_REG_mask;
  375 RegMask _INT_NO_RBP_R13_REG_mask;
  376 RegMask _FLOAT_REG_mask;
  377 RegMask _STACK_OR_PTR_REG_mask;
  378 RegMask _STACK_OR_LONG_REG_mask;
  379 RegMask _STACK_OR_INT_REG_mask;
  380 
  381 static bool need_r12_heapbase() {
  382   return UseCompressedOops;
  383 }
  384 
  385 void reg_mask_init() {
  386   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  387   // We derive a number of subsets from it.
  388   _ANY_REG_mask = _ALL_REG_mask;
  389 
  390   if (PreserveFramePointer) {
  391     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  392     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  393   }
  394   if (need_r12_heapbase()) {
  395     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  396     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  397   }
  398 
  399   _PTR_REG_mask = _ANY_REG_mask;
  400   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  401   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  402   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  403   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  404 
  405   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  406   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  407 
  408   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  409   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  410   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  411 
  412   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  413   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  414   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  415 
  416   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  417   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  418   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  419 
  420   _LONG_REG_mask = _PTR_REG_mask;
  421   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  422   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  423 
  424   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  425   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  426   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  427   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  428   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  429 
  430   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  431   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  432   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  433 
  434   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  435   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  436   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  437   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  438   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  439 
  440   _INT_REG_mask = _ALL_INT_REG_mask;
  441   if (PreserveFramePointer) {
  442     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  443   }
  444   if (need_r12_heapbase()) {
  445     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  446   }
  447 
  448   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  449   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  450 
  451   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  452   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  453   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  454 
  455   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  456   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  457 
  458   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  459   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  460   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  461 
  462   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  463   // from the float_reg_legacy/float_reg_evex register class.
  464   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  465 }
  466 
  467 static bool generate_vzeroupper(Compile* C) {
  468   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  469 }
  470 
  471 static int clear_avx_size() {
  472   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   int offset = 13; // movq r10,#addr; callq (r10)
  494   if (this->ideal_Opcode() != Op_CallLeafVector) {
  495     offset += clear_avx_size();
  496   }
  497   return offset;
  498 }
  499 //
  500 // Compute padding required for nodes which need alignment
  501 //
  502 
  503 // The address of the call instruction needs to be 4-byte aligned to
  504 // ensure that it does not span a cache line so that it can be patched.
  505 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  506 {
  507   current_offset += clear_avx_size(); // skip vzeroupper
  508   current_offset += 1; // skip call opcode byte
  509   return align_up(current_offset, alignment_required()) - current_offset;
  510 }
  511 
  512 // The address of the call instruction needs to be 4-byte aligned to
  513 // ensure that it does not span a cache line so that it can be patched.
  514 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  515 {
  516   current_offset += clear_avx_size(); // skip vzeroupper
  517   current_offset += 11; // skip movq instruction + call opcode byte
  518   return align_up(current_offset, alignment_required()) - current_offset;
  519 }
  520 
  521 // EMIT_RM()
  522 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  523   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  524   cbuf.insts()->emit_int8(c);
  525 }
  526 
  527 // EMIT_CC()
  528 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  529   unsigned char c = (unsigned char) (f1 | f2);
  530   cbuf.insts()->emit_int8(c);
  531 }
  532 
  533 // EMIT_OPCODE()
  534 void emit_opcode(CodeBuffer &cbuf, int code) {
  535   cbuf.insts()->emit_int8((unsigned char) code);
  536 }
  537 
  538 // EMIT_OPCODE() w/ relocation information
  539 void emit_opcode(CodeBuffer &cbuf,
  540                  int code, relocInfo::relocType reloc, int offset, int format)
  541 {
  542   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  543   emit_opcode(cbuf, code);
  544 }
  545 
  546 // EMIT_D8()
  547 void emit_d8(CodeBuffer &cbuf, int d8) {
  548   cbuf.insts()->emit_int8((unsigned char) d8);
  549 }
  550 
  551 // EMIT_D16()
  552 void emit_d16(CodeBuffer &cbuf, int d16) {
  553   cbuf.insts()->emit_int16(d16);
  554 }
  555 
  556 // EMIT_D32()
  557 void emit_d32(CodeBuffer &cbuf, int d32) {
  558   cbuf.insts()->emit_int32(d32);
  559 }
  560 
  561 // EMIT_D64()
  562 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  563   cbuf.insts()->emit_int64(d64);
  564 }
  565 
  566 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  567 void emit_d32_reloc(CodeBuffer& cbuf,
  568                     int d32,
  569                     relocInfo::relocType reloc,
  570                     int format)
  571 {
  572   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  573   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  574   cbuf.insts()->emit_int32(d32);
  575 }
  576 
  577 // emit 32 bit value and construct relocation entry from RelocationHolder
  578 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
  579 #ifdef ASSERT
  580   if (rspec.reloc()->type() == relocInfo::oop_type &&
  581       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
  582     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
  583     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  584   }
  585 #endif
  586   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  587   cbuf.insts()->emit_int32(d32);
  588 }
  589 
  590 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  591   address next_ip = cbuf.insts_end() + 4;
  592   emit_d32_reloc(cbuf, (int) (addr - next_ip),
  593                  external_word_Relocation::spec(addr),
  594                  RELOC_DISP32);
  595 }
  596 
  597 
  598 // emit 64 bit value and construct relocation entry from relocInfo::relocType
  599 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  600   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  601   cbuf.insts()->emit_int64(d64);
  602 }
  603 
  604 // emit 64 bit value and construct relocation entry from RelocationHolder
  605 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
  606 #ifdef ASSERT
  607   if (rspec.reloc()->type() == relocInfo::oop_type &&
  608       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
  609     assert(Universe::heap()->is_in((address)d64), "should be real oop");
  610     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  611   }
  612 #endif
  613   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  614   cbuf.insts()->emit_int64(d64);
  615 }
  616 
  617 // Access stack slot for load or store
  618 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
  619 {
  620   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  621   if (-0x80 <= disp && disp < 0x80) {
  622     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
  623     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  624     emit_d8(cbuf, disp);     // Displacement  // R/M byte
  625   } else {
  626     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
  627     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  628     emit_d32(cbuf, disp);     // Displacement // R/M byte
  629   }
  630 }
  631 
  632    // rRegI ereg, memory mem) %{    // emit_reg_mem
  633 void encode_RegMem(CodeBuffer &cbuf,
  634                    int reg,
  635                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
  636 {
  637   assert(disp_reloc == relocInfo::none, "cannot have disp");
  638   int regenc = reg & 7;
  639   int baseenc = base & 7;
  640   int indexenc = index & 7;
  641 
  642   // There is no index & no scale, use form without SIB byte
  643   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
  644     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  645     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  646       emit_rm(cbuf, 0x0, regenc, baseenc); // *
  647     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  648       // If 8-bit displacement, mode 0x1
  649       emit_rm(cbuf, 0x1, regenc, baseenc); // *
  650       emit_d8(cbuf, disp);
  651     } else {
  652       // If 32-bit displacement
  653       if (base == -1) { // Special flag for absolute address
  654         emit_rm(cbuf, 0x0, regenc, 0x5); // *
  655         if (disp_reloc != relocInfo::none) {
  656           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  657         } else {
  658           emit_d32(cbuf, disp);
  659         }
  660       } else {
  661         // Normal base + offset
  662         emit_rm(cbuf, 0x2, regenc, baseenc); // *
  663         if (disp_reloc != relocInfo::none) {
  664           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  665         } else {
  666           emit_d32(cbuf, disp);
  667         }
  668       }
  669     }
  670   } else {
  671     // Else, encode with the SIB byte
  672     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  673     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  674       // If no displacement
  675       emit_rm(cbuf, 0x0, regenc, 0x4); // *
  676       emit_rm(cbuf, scale, indexenc, baseenc);
  677     } else {
  678       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  679         // If 8-bit displacement, mode 0x1
  680         emit_rm(cbuf, 0x1, regenc, 0x4); // *
  681         emit_rm(cbuf, scale, indexenc, baseenc);
  682         emit_d8(cbuf, disp);
  683       } else {
  684         // If 32-bit displacement
  685         if (base == 0x04 ) {
  686           emit_rm(cbuf, 0x2, regenc, 0x4);
  687           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
  688         } else {
  689           emit_rm(cbuf, 0x2, regenc, 0x4);
  690           emit_rm(cbuf, scale, indexenc, baseenc); // *
  691         }
  692         if (disp_reloc != relocInfo::none) {
  693           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  694         } else {
  695           emit_d32(cbuf, disp);
  696         }
  697       }
  698     }
  699   }
  700 }
  701 
  702 // This could be in MacroAssembler but it's fairly C2 specific
  703 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  704   Label exit;
  705   __ jccb(Assembler::noParity, exit);
  706   __ pushf();
  707   //
  708   // comiss/ucomiss instructions set ZF,PF,CF flags and
  709   // zero OF,AF,SF for NaN values.
  710   // Fixup flags by zeroing ZF,PF so that compare of NaN
  711   // values returns 'less than' result (CF is set).
  712   // Leave the rest of flags unchanged.
  713   //
  714   //    7 6 5 4 3 2 1 0
  715   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  716   //    0 0 1 0 1 0 1 1   (0x2B)
  717   //
  718   __ andq(Address(rsp, 0), 0xffffff2b);
  719   __ popf();
  720   __ bind(exit);
  721 }
  722 
  723 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  724   Label done;
  725   __ movl(dst, -1);
  726   __ jcc(Assembler::parity, done);
  727   __ jcc(Assembler::below, done);
  728   __ setb(Assembler::notEqual, dst);
  729   __ movzbl(dst, dst);
  730   __ bind(done);
  731 }
  732 
  733 // Math.min()    # Math.max()
  734 // --------------------------
  735 // ucomis[s/d]   #
  736 // ja   -> b     # a
  737 // jp   -> NaN   # NaN
  738 // jb   -> a     # b
  739 // je            #
  740 // |-jz -> a | b # a & b
  741 // |    -> a     #
  742 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  743                      XMMRegister a, XMMRegister b,
  744                      XMMRegister xmmt, Register rt,
  745                      bool min, bool single) {
  746 
  747   Label nan, zero, below, above, done;
  748 
  749   if (single)
  750     __ ucomiss(a, b);
  751   else
  752     __ ucomisd(a, b);
  753 
  754   if (dst->encoding() != (min ? b : a)->encoding())
  755     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  756   else
  757     __ jccb(Assembler::above, done);
  758 
  759   __ jccb(Assembler::parity, nan);  // PF=1
  760   __ jccb(Assembler::below, below); // CF=1
  761 
  762   // equal
  763   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  764   if (single) {
  765     __ ucomiss(a, xmmt);
  766     __ jccb(Assembler::equal, zero);
  767 
  768     __ movflt(dst, a);
  769     __ jmp(done);
  770   }
  771   else {
  772     __ ucomisd(a, xmmt);
  773     __ jccb(Assembler::equal, zero);
  774 
  775     __ movdbl(dst, a);
  776     __ jmp(done);
  777   }
  778 
  779   __ bind(zero);
  780   if (min)
  781     __ vpor(dst, a, b, Assembler::AVX_128bit);
  782   else
  783     __ vpand(dst, a, b, Assembler::AVX_128bit);
  784 
  785   __ jmp(done);
  786 
  787   __ bind(above);
  788   if (single)
  789     __ movflt(dst, min ? b : a);
  790   else
  791     __ movdbl(dst, min ? b : a);
  792 
  793   __ jmp(done);
  794 
  795   __ bind(nan);
  796   if (single) {
  797     __ movl(rt, 0x7fc00000); // Float.NaN
  798     __ movdl(dst, rt);
  799   }
  800   else {
  801     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  802     __ movdq(dst, rt);
  803   }
  804   __ jmp(done);
  805 
  806   __ bind(below);
  807   if (single)
  808     __ movflt(dst, min ? a : b);
  809   else
  810     __ movdbl(dst, min ? a : b);
  811 
  812   __ bind(done);
  813 }
  814 
  815 //=============================================================================
  816 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  817 
  818 int ConstantTable::calculate_table_base_offset() const {
  819   return 0;  // absolute addressing, no offset
  820 }
  821 
  822 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  823 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  824   ShouldNotReachHere();
  825 }
  826 
  827 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  828   // Empty encoding
  829 }
  830 
  831 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  832   return 0;
  833 }
  834 
  835 #ifndef PRODUCT
  836 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  837   st->print("# MachConstantBaseNode (empty encoding)");
  838 }
  839 #endif
  840 
  841 
  842 //=============================================================================
  843 #ifndef PRODUCT
  844 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  845   Compile* C = ra_->C;
  846 
  847   int framesize = C->output()->frame_size_in_bytes();
  848   int bangsize = C->output()->bang_size_in_bytes();
  849   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  850   // Remove wordSize for return addr which is already pushed.
  851   framesize -= wordSize;
  852 
  853   if (C->output()->need_stack_bang(bangsize)) {
  854     framesize -= wordSize;
  855     st->print("# stack bang (%d bytes)", bangsize);
  856     st->print("\n\t");
  857     st->print("pushq   rbp\t# Save rbp");
  858     if (PreserveFramePointer) {
  859         st->print("\n\t");
  860         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  861     }
  862     if (framesize) {
  863       st->print("\n\t");
  864       st->print("subq    rsp, #%d\t# Create frame",framesize);
  865     }
  866   } else {
  867     st->print("subq    rsp, #%d\t# Create frame",framesize);
  868     st->print("\n\t");
  869     framesize -= wordSize;
  870     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  871     if (PreserveFramePointer) {
  872       st->print("\n\t");
  873       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  874       if (framesize > 0) {
  875         st->print("\n\t");
  876         st->print("addq    rbp, #%d", framesize);
  877       }
  878     }
  879   }
  880 
  881   if (VerifyStackAtCalls) {
  882     st->print("\n\t");
  883     framesize -= wordSize;
  884     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  885 #ifdef ASSERT
  886     st->print("\n\t");
  887     st->print("# stack alignment check");
  888 #endif
  889   }
  890   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  891     st->print("\n\t");
  892     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  893     st->print("\n\t");
  894     st->print("je      fast_entry\t");
  895     st->print("\n\t");
  896     st->print("call    #nmethod_entry_barrier_stub\t");
  897     st->print("\n\tfast_entry:");
  898   }
  899   st->cr();
  900 }
  901 #endif
  902 
  903 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  904   Compile* C = ra_->C;
  905   C2_MacroAssembler _masm(&cbuf);
  906 
  907   int framesize = C->output()->frame_size_in_bytes();
  908   int bangsize = C->output()->bang_size_in_bytes();
  909 
  910   if (C->clinit_barrier_on_entry()) {
  911     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  912     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  913 
  914     Label L_skip_barrier;
  915     Register klass = rscratch1;
  916 
  917     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  918     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  919 
  920     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  921 
  922     __ bind(L_skip_barrier);
  923   }
  924 
  925   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
  926 
  927   C->output()->set_frame_complete(cbuf.insts_size());
  928 
  929   if (C->has_mach_constant_base_node()) {
  930     // NOTE: We set the table base offset here because users might be
  931     // emitted before MachConstantBaseNode.
  932     ConstantTable& constant_table = C->output()->constant_table();
  933     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  934   }
  935 }
  936 
  937 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  938 {
  939   return MachNode::size(ra_); // too many variables; just compute it
  940                               // the hard way
  941 }
  942 
  943 int MachPrologNode::reloc() const
  944 {
  945   return 0; // a large enough number
  946 }
  947 
  948 //=============================================================================
  949 #ifndef PRODUCT
  950 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  951 {
  952   Compile* C = ra_->C;
  953   if (generate_vzeroupper(C)) {
  954     st->print("vzeroupper");
  955     st->cr(); st->print("\t");
  956   }
  957 
  958   int framesize = C->output()->frame_size_in_bytes();
  959   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  960   // Remove word for return adr already pushed
  961   // and RBP
  962   framesize -= 2*wordSize;
  963 
  964   if (framesize) {
  965     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  966     st->print("\t");
  967   }
  968 
  969   st->print_cr("popq    rbp");
  970   if (do_polling() && C->is_method_compilation()) {
  971     st->print("\t");
  972     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  973                  "ja      #safepoint_stub\t"
  974                  "# Safepoint: poll for GC");
  975   }
  976 }
  977 #endif
  978 
  979 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  980 {
  981   Compile* C = ra_->C;
  982   MacroAssembler _masm(&cbuf);
  983 
  984   if (generate_vzeroupper(C)) {
  985     // Clear upper bits of YMM registers when current compiled code uses
  986     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  987     __ vzeroupper();
  988   }
  989 
  990   int framesize = C->output()->frame_size_in_bytes();
  991   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  992   // Remove word for return adr already pushed
  993   // and RBP
  994   framesize -= 2*wordSize;
  995 
  996   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  997 
  998   if (framesize) {
  999     emit_opcode(cbuf, Assembler::REX_W);
 1000     if (framesize < 0x80) {
 1001       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 1002       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1003       emit_d8(cbuf, framesize);
 1004     } else {
 1005       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 1006       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 1007       emit_d32(cbuf, framesize);
 1008     }
 1009   }
 1010 
 1011   // popq rbp
 1012   emit_opcode(cbuf, 0x58 | RBP_enc);
 1013 
 1014   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1015     __ reserved_stack_check();
 1016   }
 1017 
 1018   if (do_polling() && C->is_method_compilation()) {
 1019     MacroAssembler _masm(&cbuf);
 1020     Label dummy_label;
 1021     Label* code_stub = &dummy_label;
 1022     if (!C->output()->in_scratch_emit_size()) {
 1023       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1024       C->output()->add_stub(stub);
 1025       code_stub = &stub->entry();
 1026     }
 1027     __ relocate(relocInfo::poll_return_type);
 1028     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1029   }
 1030 }
 1031 
 1032 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1033 {
 1034   return MachNode::size(ra_); // too many variables; just compute it
 1035                               // the hard way
 1036 }
 1037 
 1038 int MachEpilogNode::reloc() const
 1039 {
 1040   return 2; // a large enough number
 1041 }
 1042 
 1043 const Pipeline* MachEpilogNode::pipeline() const
 1044 {
 1045   return MachNode::pipeline_class();
 1046 }
 1047 
 1048 //=============================================================================
 1049 
 1050 enum RC {
 1051   rc_bad,
 1052   rc_int,
 1053   rc_kreg,
 1054   rc_float,
 1055   rc_stack
 1056 };
 1057 
 1058 static enum RC rc_class(OptoReg::Name reg)
 1059 {
 1060   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1061 
 1062   if (OptoReg::is_stack(reg)) return rc_stack;
 1063 
 1064   VMReg r = OptoReg::as_VMReg(reg);
 1065 
 1066   if (r->is_Register()) return rc_int;
 1067 
 1068   if (r->is_KRegister()) return rc_kreg;
 1069 
 1070   assert(r->is_XMMRegister(), "must be");
 1071   return rc_float;
 1072 }
 1073 
 1074 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 1075 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
 1076                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 1077 
 1078 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
 1079                      int stack_offset, int reg, uint ireg, outputStream* st);
 1080 
 1081 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 1082                                       int dst_offset, uint ireg, outputStream* st) {
 1083   if (cbuf) {
 1084     MacroAssembler _masm(cbuf);
 1085     switch (ireg) {
 1086     case Op_VecS:
 1087       __ movq(Address(rsp, -8), rax);
 1088       __ movl(rax, Address(rsp, src_offset));
 1089       __ movl(Address(rsp, dst_offset), rax);
 1090       __ movq(rax, Address(rsp, -8));
 1091       break;
 1092     case Op_VecD:
 1093       __ pushq(Address(rsp, src_offset));
 1094       __ popq (Address(rsp, dst_offset));
 1095       break;
 1096     case Op_VecX:
 1097       __ pushq(Address(rsp, src_offset));
 1098       __ popq (Address(rsp, dst_offset));
 1099       __ pushq(Address(rsp, src_offset+8));
 1100       __ popq (Address(rsp, dst_offset+8));
 1101       break;
 1102     case Op_VecY:
 1103       __ vmovdqu(Address(rsp, -32), xmm0);
 1104       __ vmovdqu(xmm0, Address(rsp, src_offset));
 1105       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 1106       __ vmovdqu(xmm0, Address(rsp, -32));
 1107       break;
 1108     case Op_VecZ:
 1109       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 1110       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1111       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1112       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1113       break;
 1114     default:
 1115       ShouldNotReachHere();
 1116     }
 1117 #ifndef PRODUCT
 1118   } else {
 1119     switch (ireg) {
 1120     case Op_VecS:
 1121       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1122                 "movl    rax, [rsp + #%d]\n\t"
 1123                 "movl    [rsp + #%d], rax\n\t"
 1124                 "movq    rax, [rsp - #8]",
 1125                 src_offset, dst_offset);
 1126       break;
 1127     case Op_VecD:
 1128       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1129                 "popq    [rsp + #%d]",
 1130                 src_offset, dst_offset);
 1131       break;
 1132      case Op_VecX:
 1133       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 1134                 "popq    [rsp + #%d]\n\t"
 1135                 "pushq   [rsp + #%d]\n\t"
 1136                 "popq    [rsp + #%d]",
 1137                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 1138       break;
 1139     case Op_VecY:
 1140       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1141                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1142                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1143                 "vmovdqu xmm0, [rsp - #32]",
 1144                 src_offset, dst_offset);
 1145       break;
 1146     case Op_VecZ:
 1147       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1148                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1149                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1150                 "vmovdqu xmm0, [rsp - #64]",
 1151                 src_offset, dst_offset);
 1152       break;
 1153     default:
 1154       ShouldNotReachHere();
 1155     }
 1156 #endif
 1157   }
 1158 }
 1159 
 1160 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1161                                        PhaseRegAlloc* ra_,
 1162                                        bool do_size,
 1163                                        outputStream* st) const {
 1164   assert(cbuf != NULL || st  != NULL, "sanity");
 1165   // Get registers to move
 1166   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1167   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1168   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1169   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1170 
 1171   enum RC src_second_rc = rc_class(src_second);
 1172   enum RC src_first_rc = rc_class(src_first);
 1173   enum RC dst_second_rc = rc_class(dst_second);
 1174   enum RC dst_first_rc = rc_class(dst_first);
 1175 
 1176   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1177          "must move at least 1 register" );
 1178 
 1179   if (src_first == dst_first && src_second == dst_second) {
 1180     // Self copy, no move
 1181     return 0;
 1182   }
 1183   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1184     uint ireg = ideal_reg();
 1185     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1186     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1187     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1188       // mem -> mem
 1189       int src_offset = ra_->reg2offset(src_first);
 1190       int dst_offset = ra_->reg2offset(dst_first);
 1191       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1192     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1193       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1194     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1195       int stack_offset = ra_->reg2offset(dst_first);
 1196       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1197     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1198       int stack_offset = ra_->reg2offset(src_first);
 1199       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1200     } else {
 1201       ShouldNotReachHere();
 1202     }
 1203     return 0;
 1204   }
 1205   if (src_first_rc == rc_stack) {
 1206     // mem ->
 1207     if (dst_first_rc == rc_stack) {
 1208       // mem -> mem
 1209       assert(src_second != dst_first, "overlap");
 1210       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1211           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1212         // 64-bit
 1213         int src_offset = ra_->reg2offset(src_first);
 1214         int dst_offset = ra_->reg2offset(dst_first);
 1215         if (cbuf) {
 1216           MacroAssembler _masm(cbuf);
 1217           __ pushq(Address(rsp, src_offset));
 1218           __ popq (Address(rsp, dst_offset));
 1219 #ifndef PRODUCT
 1220         } else {
 1221           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1222                     "popq    [rsp + #%d]",
 1223                      src_offset, dst_offset);
 1224 #endif
 1225         }
 1226       } else {
 1227         // 32-bit
 1228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1230         // No pushl/popl, so:
 1231         int src_offset = ra_->reg2offset(src_first);
 1232         int dst_offset = ra_->reg2offset(dst_first);
 1233         if (cbuf) {
 1234           MacroAssembler _masm(cbuf);
 1235           __ movq(Address(rsp, -8), rax);
 1236           __ movl(rax, Address(rsp, src_offset));
 1237           __ movl(Address(rsp, dst_offset), rax);
 1238           __ movq(rax, Address(rsp, -8));
 1239 #ifndef PRODUCT
 1240         } else {
 1241           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1242                     "movl    rax, [rsp + #%d]\n\t"
 1243                     "movl    [rsp + #%d], rax\n\t"
 1244                     "movq    rax, [rsp - #8]",
 1245                      src_offset, dst_offset);
 1246 #endif
 1247         }
 1248       }
 1249       return 0;
 1250     } else if (dst_first_rc == rc_int) {
 1251       // mem -> gpr
 1252       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1253           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1254         // 64-bit
 1255         int offset = ra_->reg2offset(src_first);
 1256         if (cbuf) {
 1257           MacroAssembler _masm(cbuf);
 1258           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1259 #ifndef PRODUCT
 1260         } else {
 1261           st->print("movq    %s, [rsp + #%d]\t# spill",
 1262                      Matcher::regName[dst_first],
 1263                      offset);
 1264 #endif
 1265         }
 1266       } else {
 1267         // 32-bit
 1268         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1269         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1270         int offset = ra_->reg2offset(src_first);
 1271         if (cbuf) {
 1272           MacroAssembler _masm(cbuf);
 1273           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1274 #ifndef PRODUCT
 1275         } else {
 1276           st->print("movl    %s, [rsp + #%d]\t# spill",
 1277                      Matcher::regName[dst_first],
 1278                      offset);
 1279 #endif
 1280         }
 1281       }
 1282       return 0;
 1283     } else if (dst_first_rc == rc_float) {
 1284       // mem-> xmm
 1285       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1286           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1287         // 64-bit
 1288         int offset = ra_->reg2offset(src_first);
 1289         if (cbuf) {
 1290           MacroAssembler _masm(cbuf);
 1291           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1292 #ifndef PRODUCT
 1293         } else {
 1294           st->print("%s  %s, [rsp + #%d]\t# spill",
 1295                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1296                      Matcher::regName[dst_first],
 1297                      offset);
 1298 #endif
 1299         }
 1300       } else {
 1301         // 32-bit
 1302         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1303         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1304         int offset = ra_->reg2offset(src_first);
 1305         if (cbuf) {
 1306           MacroAssembler _masm(cbuf);
 1307           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1308 #ifndef PRODUCT
 1309         } else {
 1310           st->print("movss   %s, [rsp + #%d]\t# spill",
 1311                      Matcher::regName[dst_first],
 1312                      offset);
 1313 #endif
 1314         }
 1315       }
 1316       return 0;
 1317     } else if (dst_first_rc == rc_kreg) {
 1318       // mem -> kreg
 1319       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1320           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1321         // 64-bit
 1322         int offset = ra_->reg2offset(src_first);
 1323         if (cbuf) {
 1324           MacroAssembler _masm(cbuf);
 1325           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1326 #ifndef PRODUCT
 1327         } else {
 1328           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1329                      Matcher::regName[dst_first],
 1330                      offset);
 1331 #endif
 1332         }
 1333       }
 1334       return 0;
 1335     }
 1336   } else if (src_first_rc == rc_int) {
 1337     // gpr ->
 1338     if (dst_first_rc == rc_stack) {
 1339       // gpr -> mem
 1340       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1341           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1342         // 64-bit
 1343         int offset = ra_->reg2offset(dst_first);
 1344         if (cbuf) {
 1345           MacroAssembler _masm(cbuf);
 1346           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1347 #ifndef PRODUCT
 1348         } else {
 1349           st->print("movq    [rsp + #%d], %s\t# spill",
 1350                      offset,
 1351                      Matcher::regName[src_first]);
 1352 #endif
 1353         }
 1354       } else {
 1355         // 32-bit
 1356         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1357         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1358         int offset = ra_->reg2offset(dst_first);
 1359         if (cbuf) {
 1360           MacroAssembler _masm(cbuf);
 1361           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1362 #ifndef PRODUCT
 1363         } else {
 1364           st->print("movl    [rsp + #%d], %s\t# spill",
 1365                      offset,
 1366                      Matcher::regName[src_first]);
 1367 #endif
 1368         }
 1369       }
 1370       return 0;
 1371     } else if (dst_first_rc == rc_int) {
 1372       // gpr -> gpr
 1373       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1374           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1375         // 64-bit
 1376         if (cbuf) {
 1377           MacroAssembler _masm(cbuf);
 1378           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1379                   as_Register(Matcher::_regEncode[src_first]));
 1380 #ifndef PRODUCT
 1381         } else {
 1382           st->print("movq    %s, %s\t# spill",
 1383                      Matcher::regName[dst_first],
 1384                      Matcher::regName[src_first]);
 1385 #endif
 1386         }
 1387         return 0;
 1388       } else {
 1389         // 32-bit
 1390         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1391         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1392         if (cbuf) {
 1393           MacroAssembler _masm(cbuf);
 1394           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1395                   as_Register(Matcher::_regEncode[src_first]));
 1396 #ifndef PRODUCT
 1397         } else {
 1398           st->print("movl    %s, %s\t# spill",
 1399                      Matcher::regName[dst_first],
 1400                      Matcher::regName[src_first]);
 1401 #endif
 1402         }
 1403         return 0;
 1404       }
 1405     } else if (dst_first_rc == rc_float) {
 1406       // gpr -> xmm
 1407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1409         // 64-bit
 1410         if (cbuf) {
 1411           MacroAssembler _masm(cbuf);
 1412           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1413 #ifndef PRODUCT
 1414         } else {
 1415           st->print("movdq   %s, %s\t# spill",
 1416                      Matcher::regName[dst_first],
 1417                      Matcher::regName[src_first]);
 1418 #endif
 1419         }
 1420       } else {
 1421         // 32-bit
 1422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1424         if (cbuf) {
 1425           MacroAssembler _masm(cbuf);
 1426           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1427 #ifndef PRODUCT
 1428         } else {
 1429           st->print("movdl   %s, %s\t# spill",
 1430                      Matcher::regName[dst_first],
 1431                      Matcher::regName[src_first]);
 1432 #endif
 1433         }
 1434       }
 1435       return 0;
 1436     } else if (dst_first_rc == rc_kreg) {
 1437       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1438           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1439         // 64-bit
 1440         if (cbuf) {
 1441           MacroAssembler _masm(cbuf);
 1442           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1443   #ifndef PRODUCT
 1444         } else {
 1445            st->print("kmovq   %s, %s\t# spill",
 1446                        Matcher::regName[dst_first],
 1447                        Matcher::regName[src_first]);
 1448   #endif
 1449         }
 1450       }
 1451       Unimplemented();
 1452       return 0;
 1453     }
 1454   } else if (src_first_rc == rc_float) {
 1455     // xmm ->
 1456     if (dst_first_rc == rc_stack) {
 1457       // xmm -> mem
 1458       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1459           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1460         // 64-bit
 1461         int offset = ra_->reg2offset(dst_first);
 1462         if (cbuf) {
 1463           MacroAssembler _masm(cbuf);
 1464           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1465 #ifndef PRODUCT
 1466         } else {
 1467           st->print("movsd   [rsp + #%d], %s\t# spill",
 1468                      offset,
 1469                      Matcher::regName[src_first]);
 1470 #endif
 1471         }
 1472       } else {
 1473         // 32-bit
 1474         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1475         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1476         int offset = ra_->reg2offset(dst_first);
 1477         if (cbuf) {
 1478           MacroAssembler _masm(cbuf);
 1479           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1480 #ifndef PRODUCT
 1481         } else {
 1482           st->print("movss   [rsp + #%d], %s\t# spill",
 1483                      offset,
 1484                      Matcher::regName[src_first]);
 1485 #endif
 1486         }
 1487       }
 1488       return 0;
 1489     } else if (dst_first_rc == rc_int) {
 1490       // xmm -> gpr
 1491       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1492           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1493         // 64-bit
 1494         if (cbuf) {
 1495           MacroAssembler _masm(cbuf);
 1496           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1497 #ifndef PRODUCT
 1498         } else {
 1499           st->print("movdq   %s, %s\t# spill",
 1500                      Matcher::regName[dst_first],
 1501                      Matcher::regName[src_first]);
 1502 #endif
 1503         }
 1504       } else {
 1505         // 32-bit
 1506         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1507         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1508         if (cbuf) {
 1509           MacroAssembler _masm(cbuf);
 1510           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1511 #ifndef PRODUCT
 1512         } else {
 1513           st->print("movdl   %s, %s\t# spill",
 1514                      Matcher::regName[dst_first],
 1515                      Matcher::regName[src_first]);
 1516 #endif
 1517         }
 1518       }
 1519       return 0;
 1520     } else if (dst_first_rc == rc_float) {
 1521       // xmm -> xmm
 1522       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1523           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1524         // 64-bit
 1525         if (cbuf) {
 1526           MacroAssembler _masm(cbuf);
 1527           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1528 #ifndef PRODUCT
 1529         } else {
 1530           st->print("%s  %s, %s\t# spill",
 1531                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1532                      Matcher::regName[dst_first],
 1533                      Matcher::regName[src_first]);
 1534 #endif
 1535         }
 1536       } else {
 1537         // 32-bit
 1538         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1539         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1540         if (cbuf) {
 1541           MacroAssembler _masm(cbuf);
 1542           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1543 #ifndef PRODUCT
 1544         } else {
 1545           st->print("%s  %s, %s\t# spill",
 1546                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1547                      Matcher::regName[dst_first],
 1548                      Matcher::regName[src_first]);
 1549 #endif
 1550         }
 1551       }
 1552       return 0;
 1553     } else if (dst_first_rc == rc_kreg) {
 1554       assert(false, "Illegal spilling");
 1555       return 0;
 1556     }
 1557   } else if (src_first_rc == rc_kreg) {
 1558     if (dst_first_rc == rc_stack) {
 1559       // mem -> kreg
 1560       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1561           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1562         // 64-bit
 1563         int offset = ra_->reg2offset(dst_first);
 1564         if (cbuf) {
 1565           MacroAssembler _masm(cbuf);
 1566           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1567 #ifndef PRODUCT
 1568         } else {
 1569           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1570                      offset,
 1571                      Matcher::regName[src_first]);
 1572 #endif
 1573         }
 1574       }
 1575       return 0;
 1576     } else if (dst_first_rc == rc_int) {
 1577       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1578           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1579         // 64-bit
 1580         if (cbuf) {
 1581           MacroAssembler _masm(cbuf);
 1582           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1583 #ifndef PRODUCT
 1584         } else {
 1585          st->print("kmovq   %s, %s\t# spill",
 1586                      Matcher::regName[dst_first],
 1587                      Matcher::regName[src_first]);
 1588 #endif
 1589         }
 1590       }
 1591       Unimplemented();
 1592       return 0;
 1593     } else if (dst_first_rc == rc_kreg) {
 1594       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1595           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1596         // 64-bit
 1597         if (cbuf) {
 1598           MacroAssembler _masm(cbuf);
 1599           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1600 #ifndef PRODUCT
 1601         } else {
 1602          st->print("kmovq   %s, %s\t# spill",
 1603                      Matcher::regName[dst_first],
 1604                      Matcher::regName[src_first]);
 1605 #endif
 1606         }
 1607       }
 1608       return 0;
 1609     } else if (dst_first_rc == rc_float) {
 1610       assert(false, "Illegal spill");
 1611       return 0;
 1612     }
 1613   }
 1614 
 1615   assert(0," foo ");
 1616   Unimplemented();
 1617   return 0;
 1618 }
 1619 
 1620 #ifndef PRODUCT
 1621 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1622   implementation(NULL, ra_, false, st);
 1623 }
 1624 #endif
 1625 
 1626 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1627   implementation(&cbuf, ra_, false, NULL);
 1628 }
 1629 
 1630 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1631   return MachNode::size(ra_);
 1632 }
 1633 
 1634 //=============================================================================
 1635 #ifndef PRODUCT
 1636 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1637 {
 1638   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1639   int reg = ra_->get_reg_first(this);
 1640   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1641             Matcher::regName[reg], offset);
 1642 }
 1643 #endif
 1644 
 1645 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1646 {
 1647   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1648   int reg = ra_->get_encode(this);
 1649   if (offset >= 0x80) {
 1650     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1651     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1652     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1653     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1654     emit_d32(cbuf, offset);
 1655   } else {
 1656     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1657     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1658     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1659     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1660     emit_d8(cbuf, offset);
 1661   }
 1662 }
 1663 
 1664 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1665 {
 1666   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1667   return (offset < 0x80) ? 5 : 8; // REX
 1668 }
 1669 
 1670 //=============================================================================
 1671 #ifndef PRODUCT
 1672 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1673 {
 1674   if (UseCompressedClassPointers) {
 1675     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1676     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1677     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1678   } else {
 1679     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1680                  "# Inline cache check");
 1681   }
 1682   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1683   st->print_cr("\tnop\t# nops to align entry point");
 1684 }
 1685 #endif
 1686 
 1687 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1688 {
 1689   MacroAssembler masm(&cbuf);
 1690   uint insts_size = cbuf.insts_size();
 1691   if (UseCompressedClassPointers) {
 1692     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1693     masm.cmpptr(rax, rscratch1);
 1694   } else {
 1695     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1696   }
 1697 
 1698   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1699 
 1700   /* WARNING these NOPs are critical so that verified entry point is properly
 1701      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1702   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1703   if (OptoBreakpoint) {
 1704     // Leave space for int3
 1705     nops_cnt -= 1;
 1706   }
 1707   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1708   if (nops_cnt > 0)
 1709     masm.nop(nops_cnt);
 1710 }
 1711 
 1712 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1713 {
 1714   return MachNode::size(ra_); // too many variables; just compute it
 1715                               // the hard way
 1716 }
 1717 
 1718 
 1719 //=============================================================================
 1720 
 1721 bool Matcher::supports_vector_calling_convention(void) {
 1722   if (EnableVectorSupport && UseVectorStubs) {
 1723     return true;
 1724   }
 1725   return false;
 1726 }
 1727 
 1728 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1729   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1730   int lo = XMM0_num;
 1731   int hi = XMM0b_num;
 1732   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1733   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1734   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1735   return OptoRegPair(hi, lo);
 1736 }
 1737 
 1738 // Is this branch offset short enough that a short branch can be used?
 1739 //
 1740 // NOTE: If the platform does not provide any short branch variants, then
 1741 //       this method should return false for offset 0.
 1742 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1743   // The passed offset is relative to address of the branch.
 1744   // On 86 a branch displacement is calculated relative to address
 1745   // of a next instruction.
 1746   offset -= br_size;
 1747 
 1748   // the short version of jmpConUCF2 contains multiple branches,
 1749   // making the reach slightly less
 1750   if (rule == jmpConUCF2_rule)
 1751     return (-126 <= offset && offset <= 125);
 1752   return (-128 <= offset && offset <= 127);
 1753 }
 1754 
 1755 // Return whether or not this register is ever used as an argument.
 1756 // This function is used on startup to build the trampoline stubs in
 1757 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1758 // call in the trampoline, and arguments in those registers not be
 1759 // available to the callee.
 1760 bool Matcher::can_be_java_arg(int reg)
 1761 {
 1762   return
 1763     reg ==  RDI_num || reg == RDI_H_num ||
 1764     reg ==  RSI_num || reg == RSI_H_num ||
 1765     reg ==  RDX_num || reg == RDX_H_num ||
 1766     reg ==  RCX_num || reg == RCX_H_num ||
 1767     reg ==   R8_num || reg ==  R8_H_num ||
 1768     reg ==   R9_num || reg ==  R9_H_num ||
 1769     reg ==  R12_num || reg == R12_H_num ||
 1770     reg == XMM0_num || reg == XMM0b_num ||
 1771     reg == XMM1_num || reg == XMM1b_num ||
 1772     reg == XMM2_num || reg == XMM2b_num ||
 1773     reg == XMM3_num || reg == XMM3b_num ||
 1774     reg == XMM4_num || reg == XMM4b_num ||
 1775     reg == XMM5_num || reg == XMM5b_num ||
 1776     reg == XMM6_num || reg == XMM6b_num ||
 1777     reg == XMM7_num || reg == XMM7b_num;
 1778 }
 1779 
 1780 bool Matcher::is_spillable_arg(int reg)
 1781 {
 1782   return can_be_java_arg(reg);
 1783 }
 1784 
 1785 uint Matcher::int_pressure_limit()
 1786 {
 1787   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1788 }
 1789 
 1790 uint Matcher::float_pressure_limit()
 1791 {
 1792   // After experiment around with different values, the following default threshold
 1793   // works best for LCM's register pressure scheduling on x64.
 1794   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1795   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1796   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1797 }
 1798 
 1799 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1800   // In 64 bit mode a code which use multiply when
 1801   // devisor is constant is faster than hardware
 1802   // DIV instruction (it uses MulHiL).
 1803   return false;
 1804 }
 1805 
 1806 // Register for DIVI projection of divmodI
 1807 RegMask Matcher::divI_proj_mask() {
 1808   return INT_RAX_REG_mask();
 1809 }
 1810 
 1811 // Register for MODI projection of divmodI
 1812 RegMask Matcher::modI_proj_mask() {
 1813   return INT_RDX_REG_mask();
 1814 }
 1815 
 1816 // Register for DIVL projection of divmodL
 1817 RegMask Matcher::divL_proj_mask() {
 1818   return LONG_RAX_REG_mask();
 1819 }
 1820 
 1821 // Register for MODL projection of divmodL
 1822 RegMask Matcher::modL_proj_mask() {
 1823   return LONG_RDX_REG_mask();
 1824 }
 1825 
 1826 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1827 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1828     return NO_REG_mask();
 1829 }
 1830 
 1831 %}
 1832 
 1833 //----------ENCODING BLOCK-----------------------------------------------------
 1834 // This block specifies the encoding classes used by the compiler to
 1835 // output byte streams.  Encoding classes are parameterized macros
 1836 // used by Machine Instruction Nodes in order to generate the bit
 1837 // encoding of the instruction.  Operands specify their base encoding
 1838 // interface with the interface keyword.  There are currently
 1839 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1840 // COND_INTER.  REG_INTER causes an operand to generate a function
 1841 // which returns its register number when queried.  CONST_INTER causes
 1842 // an operand to generate a function which returns the value of the
 1843 // constant when queried.  MEMORY_INTER causes an operand to generate
 1844 // four functions which return the Base Register, the Index Register,
 1845 // the Scale Value, and the Offset Value of the operand when queried.
 1846 // COND_INTER causes an operand to generate six functions which return
 1847 // the encoding code (ie - encoding bits for the instruction)
 1848 // associated with each basic boolean condition for a conditional
 1849 // instruction.
 1850 //
 1851 // Instructions specify two basic values for encoding.  Again, a
 1852 // function is available to check if the constant displacement is an
 1853 // oop. They use the ins_encode keyword to specify their encoding
 1854 // classes (which must be a sequence of enc_class names, and their
 1855 // parameters, specified in the encoding block), and they use the
 1856 // opcode keyword to specify, in order, their primary, secondary, and
 1857 // tertiary opcode.  Only the opcode sections which a particular
 1858 // instruction needs for encoding need to be specified.
 1859 encode %{
 1860   // Build emit functions for each basic byte or larger field in the
 1861   // intel encoding scheme (opcode, rm, sib, immediate), and call them
 1862   // from C++ code in the enc_class source block.  Emit functions will
 1863   // live in the main source block for now.  In future, we can
 1864   // generalize this by adding a syntax that specifies the sizes of
 1865   // fields in an order, so that the adlc can build the emit functions
 1866   // automagically
 1867 
 1868   // Emit primary opcode
 1869   enc_class OpcP
 1870   %{
 1871     emit_opcode(cbuf, $primary);
 1872   %}
 1873 
 1874   // Emit secondary opcode
 1875   enc_class OpcS
 1876   %{
 1877     emit_opcode(cbuf, $secondary);
 1878   %}
 1879 
 1880   // Emit tertiary opcode
 1881   enc_class OpcT
 1882   %{
 1883     emit_opcode(cbuf, $tertiary);
 1884   %}
 1885 
 1886   // Emit opcode directly
 1887   enc_class Opcode(immI d8)
 1888   %{
 1889     emit_opcode(cbuf, $d8$$constant);
 1890   %}
 1891 
 1892   // Emit size prefix
 1893   enc_class SizePrefix
 1894   %{
 1895     emit_opcode(cbuf, 0x66);
 1896   %}
 1897 
 1898   enc_class reg(rRegI reg)
 1899   %{
 1900     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
 1901   %}
 1902 
 1903   enc_class reg_reg(rRegI dst, rRegI src)
 1904   %{
 1905     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1906   %}
 1907 
 1908   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
 1909   %{
 1910     emit_opcode(cbuf, $opcode$$constant);
 1911     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1912   %}
 1913 
 1914   enc_class cdql_enc(no_rax_rdx_RegI div)
 1915   %{
 1916     // Full implementation of Java idiv and irem; checks for
 1917     // special case as described in JVM spec., p.243 & p.271.
 1918     //
 1919     //         normal case                           special case
 1920     //
 1921     // input : rax: dividend                         min_int
 1922     //         reg: divisor                          -1
 1923     //
 1924     // output: rax: quotient  (= rax idiv reg)       min_int
 1925     //         rdx: remainder (= rax irem reg)       0
 1926     //
 1927     //  Code sequnce:
 1928     //
 1929     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1930     //    5:   75 07/08                jne    e <normal>
 1931     //    7:   33 d2                   xor    %edx,%edx
 1932     //  [div >= 8 -> offset + 1]
 1933     //  [REX_B]
 1934     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1935     //    c:   74 03/04                je     11 <done>
 1936     // 000000000000000e <normal>:
 1937     //    e:   99                      cltd
 1938     //  [div >= 8 -> offset + 1]
 1939     //  [REX_B]
 1940     //    f:   f7 f9                   idiv   $div
 1941     // 0000000000000011 <done>:
 1942     MacroAssembler _masm(&cbuf);
 1943     Label normal;
 1944     Label done;
 1945 
 1946     // cmp    $0x80000000,%eax
 1947     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1948 
 1949     // jne    e <normal>
 1950     __ jccb(Assembler::notEqual, normal);
 1951 
 1952     // xor    %edx,%edx
 1953     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1954 
 1955     // cmp    $0xffffffffffffffff,%ecx
 1956     __ cmpl($div$$Register, -1);
 1957 
 1958     // je     11 <done>
 1959     __ jccb(Assembler::equal, done);
 1960 
 1961     // <normal>
 1962     // cltd
 1963     __ bind(normal);
 1964     __ cdql();
 1965 
 1966     // idivl
 1967     // <done>
 1968     __ idivl($div$$Register);
 1969     __ bind(done);
 1970   %}
 1971 
 1972   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1973   %{
 1974     // Full implementation of Java ldiv and lrem; checks for
 1975     // special case as described in JVM spec., p.243 & p.271.
 1976     //
 1977     //         normal case                           special case
 1978     //
 1979     // input : rax: dividend                         min_long
 1980     //         reg: divisor                          -1
 1981     //
 1982     // output: rax: quotient  (= rax idiv reg)       min_long
 1983     //         rdx: remainder (= rax irem reg)       0
 1984     //
 1985     //  Code sequnce:
 1986     //
 1987     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1988     //    7:   00 00 80
 1989     //    a:   48 39 d0                cmp    %rdx,%rax
 1990     //    d:   75 08                   jne    17 <normal>
 1991     //    f:   33 d2                   xor    %edx,%edx
 1992     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 1993     //   15:   74 05                   je     1c <done>
 1994     // 0000000000000017 <normal>:
 1995     //   17:   48 99                   cqto
 1996     //   19:   48 f7 f9                idiv   $div
 1997     // 000000000000001c <done>:
 1998     MacroAssembler _masm(&cbuf);
 1999     Label normal;
 2000     Label done;
 2001 
 2002     // mov    $0x8000000000000000,%rdx
 2003     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 2004 
 2005     // cmp    %rdx,%rax
 2006     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 2007 
 2008     // jne    17 <normal>
 2009     __ jccb(Assembler::notEqual, normal);
 2010 
 2011     // xor    %edx,%edx
 2012     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 2013 
 2014     // cmp    $0xffffffffffffffff,$div
 2015     __ cmpq($div$$Register, -1);
 2016 
 2017     // je     1e <done>
 2018     __ jccb(Assembler::equal, done);
 2019 
 2020     // <normal>
 2021     // cqto
 2022     __ bind(normal);
 2023     __ cdqq();
 2024 
 2025     // idivq (note: must be emitted by the user of this rule)
 2026     // <done>
 2027     __ idivq($div$$Register);
 2028     __ bind(done);
 2029   %}
 2030 
 2031   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 2032   enc_class OpcSE(immI imm)
 2033   %{
 2034     // Emit primary opcode and set sign-extend bit
 2035     // Check for 8-bit immediate, and set sign extend bit in opcode
 2036     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2037       emit_opcode(cbuf, $primary | 0x02);
 2038     } else {
 2039       // 32-bit immediate
 2040       emit_opcode(cbuf, $primary);
 2041     }
 2042   %}
 2043 
 2044   enc_class OpcSErm(rRegI dst, immI imm)
 2045   %{
 2046     // OpcSEr/m
 2047     int dstenc = $dst$$reg;
 2048     if (dstenc >= 8) {
 2049       emit_opcode(cbuf, Assembler::REX_B);
 2050       dstenc -= 8;
 2051     }
 2052     // Emit primary opcode and set sign-extend bit
 2053     // Check for 8-bit immediate, and set sign extend bit in opcode
 2054     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2055       emit_opcode(cbuf, $primary | 0x02);
 2056     } else {
 2057       // 32-bit immediate
 2058       emit_opcode(cbuf, $primary);
 2059     }
 2060     // Emit r/m byte with secondary opcode, after primary opcode.
 2061     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2062   %}
 2063 
 2064   enc_class OpcSErm_wide(rRegL dst, immI imm)
 2065   %{
 2066     // OpcSEr/m
 2067     int dstenc = $dst$$reg;
 2068     if (dstenc < 8) {
 2069       emit_opcode(cbuf, Assembler::REX_W);
 2070     } else {
 2071       emit_opcode(cbuf, Assembler::REX_WB);
 2072       dstenc -= 8;
 2073     }
 2074     // Emit primary opcode and set sign-extend bit
 2075     // Check for 8-bit immediate, and set sign extend bit in opcode
 2076     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2077       emit_opcode(cbuf, $primary | 0x02);
 2078     } else {
 2079       // 32-bit immediate
 2080       emit_opcode(cbuf, $primary);
 2081     }
 2082     // Emit r/m byte with secondary opcode, after primary opcode.
 2083     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2084   %}
 2085 
 2086   enc_class Con8or32(immI imm)
 2087   %{
 2088     // Check for 8-bit immediate, and set sign extend bit in opcode
 2089     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2090       $$$emit8$imm$$constant;
 2091     } else {
 2092       // 32-bit immediate
 2093       $$$emit32$imm$$constant;
 2094     }
 2095   %}
 2096 
 2097   enc_class opc2_reg(rRegI dst)
 2098   %{
 2099     // BSWAP
 2100     emit_cc(cbuf, $secondary, $dst$$reg);
 2101   %}
 2102 
 2103   enc_class opc3_reg(rRegI dst)
 2104   %{
 2105     // BSWAP
 2106     emit_cc(cbuf, $tertiary, $dst$$reg);
 2107   %}
 2108 
 2109   enc_class reg_opc(rRegI div)
 2110   %{
 2111     // INC, DEC, IDIV, IMOD, JMP indirect, ...
 2112     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
 2113   %}
 2114 
 2115   enc_class enc_cmov(cmpOp cop)
 2116   %{
 2117     // CMOV
 2118     $$$emit8$primary;
 2119     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2120   %}
 2121 
 2122   enc_class enc_PartialSubtypeCheck()
 2123   %{
 2124     Register Rrdi = as_Register(RDI_enc); // result register
 2125     Register Rrax = as_Register(RAX_enc); // super class
 2126     Register Rrcx = as_Register(RCX_enc); // killed
 2127     Register Rrsi = as_Register(RSI_enc); // sub class
 2128     Label miss;
 2129     const bool set_cond_codes = true;
 2130 
 2131     MacroAssembler _masm(&cbuf);
 2132     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2133                                      NULL, &miss,
 2134                                      /*set_cond_codes:*/ true);
 2135     if ($primary) {
 2136       __ xorptr(Rrdi, Rrdi);
 2137     }
 2138     __ bind(miss);
 2139   %}
 2140 
 2141   enc_class clear_avx %{
 2142     debug_only(int off0 = cbuf.insts_size());
 2143     if (generate_vzeroupper(Compile::current())) {
 2144       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2145       // Clear upper bits of YMM registers when current compiled code uses
 2146       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2147       MacroAssembler _masm(&cbuf);
 2148       __ vzeroupper();
 2149     }
 2150     debug_only(int off1 = cbuf.insts_size());
 2151     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2152   %}
 2153 
 2154   enc_class Java_To_Runtime(method meth) %{
 2155     // No relocation needed
 2156     MacroAssembler _masm(&cbuf);
 2157     __ mov64(r10, (int64_t) $meth$$method);
 2158     __ call(r10);
 2159     __ post_call_nop();
 2160   %}
 2161 
 2162   enc_class Java_Static_Call(method meth)
 2163   %{
 2164     // JAVA STATIC CALL
 2165     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 2166     // determine who we intended to call.
 2167     MacroAssembler _masm(&cbuf);
 2168     cbuf.set_insts_mark();
 2169 
 2170     if (!_method) {
 2171       $$$emit8$primary;
 2172       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2173                      runtime_call_Relocation::spec(),
 2174                      RELOC_DISP32);
 2175     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2176       // The NOP here is purely to ensure that eliding a call to
 2177       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2178       __ addr_nop_5();
 2179       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2180     } else {
 2181       $$$emit8$primary;
 2182       int method_index = resolved_method_index(cbuf);
 2183       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2184                                                   : static_call_Relocation::spec(method_index);
 2185       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2186                      rspec, RELOC_DISP32);
 2187       address mark = cbuf.insts_mark();
 2188       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2189         // Calls of the same statically bound method can share
 2190         // a stub to the interpreter.
 2191         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2192       } else {
 2193         // Emit stubs for static call.
 2194         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2195         if (stub == NULL) {
 2196           ciEnv::current()->record_failure("CodeCache is full");
 2197           return;
 2198         }
 2199       }
 2200     }
 2201     _masm.clear_inst_mark();
 2202     __ post_call_nop();
 2203   %}
 2204 
 2205   enc_class Java_Dynamic_Call(method meth) %{
 2206     MacroAssembler _masm(&cbuf);
 2207     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2208     __ post_call_nop();
 2209   %}
 2210 
 2211   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2212   %{
 2213     // SAL, SAR, SHR
 2214     int dstenc = $dst$$reg;
 2215     if (dstenc >= 8) {
 2216       emit_opcode(cbuf, Assembler::REX_B);
 2217       dstenc -= 8;
 2218     }
 2219     $$$emit8$primary;
 2220     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2221     $$$emit8$shift$$constant;
 2222   %}
 2223 
 2224   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
 2225   %{
 2226     // SAL, SAR, SHR
 2227     int dstenc = $dst$$reg;
 2228     if (dstenc < 8) {
 2229       emit_opcode(cbuf, Assembler::REX_W);
 2230     } else {
 2231       emit_opcode(cbuf, Assembler::REX_WB);
 2232       dstenc -= 8;
 2233     }
 2234     $$$emit8$primary;
 2235     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2236     $$$emit8$shift$$constant;
 2237   %}
 2238 
 2239   enc_class load_immI(rRegI dst, immI src)
 2240   %{
 2241     int dstenc = $dst$$reg;
 2242     if (dstenc >= 8) {
 2243       emit_opcode(cbuf, Assembler::REX_B);
 2244       dstenc -= 8;
 2245     }
 2246     emit_opcode(cbuf, 0xB8 | dstenc);
 2247     $$$emit32$src$$constant;
 2248   %}
 2249 
 2250   enc_class load_immL(rRegL dst, immL src)
 2251   %{
 2252     int dstenc = $dst$$reg;
 2253     if (dstenc < 8) {
 2254       emit_opcode(cbuf, Assembler::REX_W);
 2255     } else {
 2256       emit_opcode(cbuf, Assembler::REX_WB);
 2257       dstenc -= 8;
 2258     }
 2259     emit_opcode(cbuf, 0xB8 | dstenc);
 2260     emit_d64(cbuf, $src$$constant);
 2261   %}
 2262 
 2263   enc_class load_immUL32(rRegL dst, immUL32 src)
 2264   %{
 2265     // same as load_immI, but this time we care about zeroes in the high word
 2266     int dstenc = $dst$$reg;
 2267     if (dstenc >= 8) {
 2268       emit_opcode(cbuf, Assembler::REX_B);
 2269       dstenc -= 8;
 2270     }
 2271     emit_opcode(cbuf, 0xB8 | dstenc);
 2272     $$$emit32$src$$constant;
 2273   %}
 2274 
 2275   enc_class load_immL32(rRegL dst, immL32 src)
 2276   %{
 2277     int dstenc = $dst$$reg;
 2278     if (dstenc < 8) {
 2279       emit_opcode(cbuf, Assembler::REX_W);
 2280     } else {
 2281       emit_opcode(cbuf, Assembler::REX_WB);
 2282       dstenc -= 8;
 2283     }
 2284     emit_opcode(cbuf, 0xC7);
 2285     emit_rm(cbuf, 0x03, 0x00, dstenc);
 2286     $$$emit32$src$$constant;
 2287   %}
 2288 
 2289   enc_class load_immP31(rRegP dst, immP32 src)
 2290   %{
 2291     // same as load_immI, but this time we care about zeroes in the high word
 2292     int dstenc = $dst$$reg;
 2293     if (dstenc >= 8) {
 2294       emit_opcode(cbuf, Assembler::REX_B);
 2295       dstenc -= 8;
 2296     }
 2297     emit_opcode(cbuf, 0xB8 | dstenc);
 2298     $$$emit32$src$$constant;
 2299   %}
 2300 
 2301   enc_class load_immP(rRegP dst, immP src)
 2302   %{
 2303     int dstenc = $dst$$reg;
 2304     if (dstenc < 8) {
 2305       emit_opcode(cbuf, Assembler::REX_W);
 2306     } else {
 2307       emit_opcode(cbuf, Assembler::REX_WB);
 2308       dstenc -= 8;
 2309     }
 2310     emit_opcode(cbuf, 0xB8 | dstenc);
 2311     // This next line should be generated from ADLC
 2312     if ($src->constant_reloc() != relocInfo::none) {
 2313       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
 2314     } else {
 2315       emit_d64(cbuf, $src$$constant);
 2316     }
 2317   %}
 2318 
 2319   enc_class Con32(immI src)
 2320   %{
 2321     // Output immediate
 2322     $$$emit32$src$$constant;
 2323   %}
 2324 
 2325   enc_class Con32F_as_bits(immF src)
 2326   %{
 2327     // Output Float immediate bits
 2328     jfloat jf = $src$$constant;
 2329     jint jf_as_bits = jint_cast(jf);
 2330     emit_d32(cbuf, jf_as_bits);
 2331   %}
 2332 
 2333   enc_class Con16(immI src)
 2334   %{
 2335     // Output immediate
 2336     $$$emit16$src$$constant;
 2337   %}
 2338 
 2339   // How is this different from Con32??? XXX
 2340   enc_class Con_d32(immI src)
 2341   %{
 2342     emit_d32(cbuf,$src$$constant);
 2343   %}
 2344 
 2345   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
 2346     // Output immediate memory reference
 2347     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2348     emit_d32(cbuf, 0x00);
 2349   %}
 2350 
 2351   enc_class lock_prefix()
 2352   %{
 2353     emit_opcode(cbuf, 0xF0); // lock
 2354   %}
 2355 
 2356   enc_class REX_mem(memory mem)
 2357   %{
 2358     if ($mem$$base >= 8) {
 2359       if ($mem$$index < 8) {
 2360         emit_opcode(cbuf, Assembler::REX_B);
 2361       } else {
 2362         emit_opcode(cbuf, Assembler::REX_XB);
 2363       }
 2364     } else {
 2365       if ($mem$$index >= 8) {
 2366         emit_opcode(cbuf, Assembler::REX_X);
 2367       }
 2368     }
 2369   %}
 2370 
 2371   enc_class REX_mem_wide(memory mem)
 2372   %{
 2373     if ($mem$$base >= 8) {
 2374       if ($mem$$index < 8) {
 2375         emit_opcode(cbuf, Assembler::REX_WB);
 2376       } else {
 2377         emit_opcode(cbuf, Assembler::REX_WXB);
 2378       }
 2379     } else {
 2380       if ($mem$$index < 8) {
 2381         emit_opcode(cbuf, Assembler::REX_W);
 2382       } else {
 2383         emit_opcode(cbuf, Assembler::REX_WX);
 2384       }
 2385     }
 2386   %}
 2387 
 2388   // for byte regs
 2389   enc_class REX_breg(rRegI reg)
 2390   %{
 2391     if ($reg$$reg >= 4) {
 2392       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2393     }
 2394   %}
 2395 
 2396   // for byte regs
 2397   enc_class REX_reg_breg(rRegI dst, rRegI src)
 2398   %{
 2399     if ($dst$$reg < 8) {
 2400       if ($src$$reg >= 4) {
 2401         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2402       }
 2403     } else {
 2404       if ($src$$reg < 8) {
 2405         emit_opcode(cbuf, Assembler::REX_R);
 2406       } else {
 2407         emit_opcode(cbuf, Assembler::REX_RB);
 2408       }
 2409     }
 2410   %}
 2411 
 2412   // for byte regs
 2413   enc_class REX_breg_mem(rRegI reg, memory mem)
 2414   %{
 2415     if ($reg$$reg < 8) {
 2416       if ($mem$$base < 8) {
 2417         if ($mem$$index >= 8) {
 2418           emit_opcode(cbuf, Assembler::REX_X);
 2419         } else if ($reg$$reg >= 4) {
 2420           emit_opcode(cbuf, Assembler::REX);
 2421         }
 2422       } else {
 2423         if ($mem$$index < 8) {
 2424           emit_opcode(cbuf, Assembler::REX_B);
 2425         } else {
 2426           emit_opcode(cbuf, Assembler::REX_XB);
 2427         }
 2428       }
 2429     } else {
 2430       if ($mem$$base < 8) {
 2431         if ($mem$$index < 8) {
 2432           emit_opcode(cbuf, Assembler::REX_R);
 2433         } else {
 2434           emit_opcode(cbuf, Assembler::REX_RX);
 2435         }
 2436       } else {
 2437         if ($mem$$index < 8) {
 2438           emit_opcode(cbuf, Assembler::REX_RB);
 2439         } else {
 2440           emit_opcode(cbuf, Assembler::REX_RXB);
 2441         }
 2442       }
 2443     }
 2444   %}
 2445 
 2446   enc_class REX_reg(rRegI reg)
 2447   %{
 2448     if ($reg$$reg >= 8) {
 2449       emit_opcode(cbuf, Assembler::REX_B);
 2450     }
 2451   %}
 2452 
 2453   enc_class REX_reg_wide(rRegI reg)
 2454   %{
 2455     if ($reg$$reg < 8) {
 2456       emit_opcode(cbuf, Assembler::REX_W);
 2457     } else {
 2458       emit_opcode(cbuf, Assembler::REX_WB);
 2459     }
 2460   %}
 2461 
 2462   enc_class REX_reg_reg(rRegI dst, rRegI src)
 2463   %{
 2464     if ($dst$$reg < 8) {
 2465       if ($src$$reg >= 8) {
 2466         emit_opcode(cbuf, Assembler::REX_B);
 2467       }
 2468     } else {
 2469       if ($src$$reg < 8) {
 2470         emit_opcode(cbuf, Assembler::REX_R);
 2471       } else {
 2472         emit_opcode(cbuf, Assembler::REX_RB);
 2473       }
 2474     }
 2475   %}
 2476 
 2477   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
 2478   %{
 2479     if ($dst$$reg < 8) {
 2480       if ($src$$reg < 8) {
 2481         emit_opcode(cbuf, Assembler::REX_W);
 2482       } else {
 2483         emit_opcode(cbuf, Assembler::REX_WB);
 2484       }
 2485     } else {
 2486       if ($src$$reg < 8) {
 2487         emit_opcode(cbuf, Assembler::REX_WR);
 2488       } else {
 2489         emit_opcode(cbuf, Assembler::REX_WRB);
 2490       }
 2491     }
 2492   %}
 2493 
 2494   enc_class REX_reg_mem(rRegI reg, memory mem)
 2495   %{
 2496     if ($reg$$reg < 8) {
 2497       if ($mem$$base < 8) {
 2498         if ($mem$$index >= 8) {
 2499           emit_opcode(cbuf, Assembler::REX_X);
 2500         }
 2501       } else {
 2502         if ($mem$$index < 8) {
 2503           emit_opcode(cbuf, Assembler::REX_B);
 2504         } else {
 2505           emit_opcode(cbuf, Assembler::REX_XB);
 2506         }
 2507       }
 2508     } else {
 2509       if ($mem$$base < 8) {
 2510         if ($mem$$index < 8) {
 2511           emit_opcode(cbuf, Assembler::REX_R);
 2512         } else {
 2513           emit_opcode(cbuf, Assembler::REX_RX);
 2514         }
 2515       } else {
 2516         if ($mem$$index < 8) {
 2517           emit_opcode(cbuf, Assembler::REX_RB);
 2518         } else {
 2519           emit_opcode(cbuf, Assembler::REX_RXB);
 2520         }
 2521       }
 2522     }
 2523   %}
 2524 
 2525   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
 2526   %{
 2527     if ($reg$$reg < 8) {
 2528       if ($mem$$base < 8) {
 2529         if ($mem$$index < 8) {
 2530           emit_opcode(cbuf, Assembler::REX_W);
 2531         } else {
 2532           emit_opcode(cbuf, Assembler::REX_WX);
 2533         }
 2534       } else {
 2535         if ($mem$$index < 8) {
 2536           emit_opcode(cbuf, Assembler::REX_WB);
 2537         } else {
 2538           emit_opcode(cbuf, Assembler::REX_WXB);
 2539         }
 2540       }
 2541     } else {
 2542       if ($mem$$base < 8) {
 2543         if ($mem$$index < 8) {
 2544           emit_opcode(cbuf, Assembler::REX_WR);
 2545         } else {
 2546           emit_opcode(cbuf, Assembler::REX_WRX);
 2547         }
 2548       } else {
 2549         if ($mem$$index < 8) {
 2550           emit_opcode(cbuf, Assembler::REX_WRB);
 2551         } else {
 2552           emit_opcode(cbuf, Assembler::REX_WRXB);
 2553         }
 2554       }
 2555     }
 2556   %}
 2557 
 2558   enc_class reg_mem(rRegI ereg, memory mem)
 2559   %{
 2560     // High registers handle in encode_RegMem
 2561     int reg = $ereg$$reg;
 2562     int base = $mem$$base;
 2563     int index = $mem$$index;
 2564     int scale = $mem$$scale;
 2565     int disp = $mem$$disp;
 2566     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2567 
 2568     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
 2569   %}
 2570 
 2571   enc_class RM_opc_mem(immI rm_opcode, memory mem)
 2572   %{
 2573     int rm_byte_opcode = $rm_opcode$$constant;
 2574 
 2575     // High registers handle in encode_RegMem
 2576     int base = $mem$$base;
 2577     int index = $mem$$index;
 2578     int scale = $mem$$scale;
 2579     int displace = $mem$$disp;
 2580 
 2581     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
 2582                                             // working with static
 2583                                             // globals
 2584     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
 2585                   disp_reloc);
 2586   %}
 2587 
 2588   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
 2589   %{
 2590     int reg_encoding = $dst$$reg;
 2591     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2592     int index        = 0x04;            // 0x04 indicates no index
 2593     int scale        = 0x00;            // 0x00 indicates no scale
 2594     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2595     relocInfo::relocType disp_reloc = relocInfo::none;
 2596     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
 2597                   disp_reloc);
 2598   %}
 2599 
 2600   enc_class neg_reg(rRegI dst)
 2601   %{
 2602     int dstenc = $dst$$reg;
 2603     if (dstenc >= 8) {
 2604       emit_opcode(cbuf, Assembler::REX_B);
 2605       dstenc -= 8;
 2606     }
 2607     // NEG $dst
 2608     emit_opcode(cbuf, 0xF7);
 2609     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2610   %}
 2611 
 2612   enc_class neg_reg_wide(rRegI dst)
 2613   %{
 2614     int dstenc = $dst$$reg;
 2615     if (dstenc < 8) {
 2616       emit_opcode(cbuf, Assembler::REX_W);
 2617     } else {
 2618       emit_opcode(cbuf, Assembler::REX_WB);
 2619       dstenc -= 8;
 2620     }
 2621     // NEG $dst
 2622     emit_opcode(cbuf, 0xF7);
 2623     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2624   %}
 2625 
 2626   enc_class setLT_reg(rRegI dst)
 2627   %{
 2628     int dstenc = $dst$$reg;
 2629     if (dstenc >= 8) {
 2630       emit_opcode(cbuf, Assembler::REX_B);
 2631       dstenc -= 8;
 2632     } else if (dstenc >= 4) {
 2633       emit_opcode(cbuf, Assembler::REX);
 2634     }
 2635     // SETLT $dst
 2636     emit_opcode(cbuf, 0x0F);
 2637     emit_opcode(cbuf, 0x9C);
 2638     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2639   %}
 2640 
 2641   enc_class setNZ_reg(rRegI dst)
 2642   %{
 2643     int dstenc = $dst$$reg;
 2644     if (dstenc >= 8) {
 2645       emit_opcode(cbuf, Assembler::REX_B);
 2646       dstenc -= 8;
 2647     } else if (dstenc >= 4) {
 2648       emit_opcode(cbuf, Assembler::REX);
 2649     }
 2650     // SETNZ $dst
 2651     emit_opcode(cbuf, 0x0F);
 2652     emit_opcode(cbuf, 0x95);
 2653     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2654   %}
 2655 
 2656 
 2657   // Compare the lonogs and set -1, 0, or 1 into dst
 2658   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
 2659   %{
 2660     int src1enc = $src1$$reg;
 2661     int src2enc = $src2$$reg;
 2662     int dstenc = $dst$$reg;
 2663 
 2664     // cmpq $src1, $src2
 2665     if (src1enc < 8) {
 2666       if (src2enc < 8) {
 2667         emit_opcode(cbuf, Assembler::REX_W);
 2668       } else {
 2669         emit_opcode(cbuf, Assembler::REX_WB);
 2670       }
 2671     } else {
 2672       if (src2enc < 8) {
 2673         emit_opcode(cbuf, Assembler::REX_WR);
 2674       } else {
 2675         emit_opcode(cbuf, Assembler::REX_WRB);
 2676       }
 2677     }
 2678     emit_opcode(cbuf, 0x3B);
 2679     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
 2680 
 2681     // movl $dst, -1
 2682     if (dstenc >= 8) {
 2683       emit_opcode(cbuf, Assembler::REX_B);
 2684     }
 2685     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
 2686     emit_d32(cbuf, -1);
 2687 
 2688     // jl,s done
 2689     emit_opcode(cbuf, 0x7C);
 2690     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
 2691 
 2692     // setne $dst
 2693     if (dstenc >= 4) {
 2694       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
 2695     }
 2696     emit_opcode(cbuf, 0x0F);
 2697     emit_opcode(cbuf, 0x95);
 2698     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
 2699 
 2700     // movzbl $dst, $dst
 2701     if (dstenc >= 4) {
 2702       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
 2703     }
 2704     emit_opcode(cbuf, 0x0F);
 2705     emit_opcode(cbuf, 0xB6);
 2706     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
 2707   %}
 2708 
 2709   enc_class Push_ResultXD(regD dst) %{
 2710     MacroAssembler _masm(&cbuf);
 2711     __ fstp_d(Address(rsp, 0));
 2712     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2713     __ addptr(rsp, 8);
 2714   %}
 2715 
 2716   enc_class Push_SrcXD(regD src) %{
 2717     MacroAssembler _masm(&cbuf);
 2718     __ subptr(rsp, 8);
 2719     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2720     __ fld_d(Address(rsp, 0));
 2721   %}
 2722 
 2723 
 2724   enc_class enc_rethrow()
 2725   %{
 2726     cbuf.set_insts_mark();
 2727     emit_opcode(cbuf, 0xE9); // jmp entry
 2728     emit_d32_reloc(cbuf,
 2729                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
 2730                    runtime_call_Relocation::spec(),
 2731                    RELOC_DISP32);
 2732   %}
 2733 
 2734 %}
 2735 
 2736 
 2737 
 2738 //----------FRAME--------------------------------------------------------------
 2739 // Definition of frame structure and management information.
 2740 //
 2741 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2742 //                             |   (to get allocators register number
 2743 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2744 //  r   CALLER     |        |
 2745 //  o     |        +--------+      pad to even-align allocators stack-slot
 2746 //  w     V        |  pad0  |        numbers; owned by CALLER
 2747 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 2748 //  h     ^        |   in   |  5
 2749 //        |        |  args  |  4   Holes in incoming args owned by SELF
 2750 //  |     |        |        |  3
 2751 //  |     |        +--------+
 2752 //  V     |        | old out|      Empty on Intel, window on Sparc
 2753 //        |    old |preserve|      Must be even aligned.
 2754 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 2755 //        |        |   in   |  3   area for Intel ret address
 2756 //     Owned by    |preserve|      Empty on Sparc.
 2757 //       SELF      +--------+
 2758 //        |        |  pad2  |  2   pad to align old SP
 2759 //        |        +--------+  1
 2760 //        |        | locks  |  0
 2761 //        |        +--------+----> OptoReg::stack0(), even aligned
 2762 //        |        |  pad1  | 11   pad to align new SP
 2763 //        |        +--------+
 2764 //        |        |        | 10
 2765 //        |        | spills |  9   spills
 2766 //        V        |        |  8   (pad0 slot for callee)
 2767 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 2768 //        ^        |  out   |  7
 2769 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 2770 //     Owned by    +--------+
 2771 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 2772 //        |    new |preserve|      Must be even-aligned.
 2773 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 2774 //        |        |        |
 2775 //
 2776 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 2777 //         known from SELF's arguments and the Java calling convention.
 2778 //         Region 6-7 is determined per call site.
 2779 // Note 2: If the calling convention leaves holes in the incoming argument
 2780 //         area, those holes are owned by SELF.  Holes in the outgoing area
 2781 //         are owned by the CALLEE.  Holes should not be necessary in the
 2782 //         incoming area, as the Java calling convention is completely under
 2783 //         the control of the AD file.  Doubles can be sorted and packed to
 2784 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 2785 //         varargs C calling conventions.
 2786 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 2787 //         even aligned with pad0 as needed.
 2788 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 2789 //         region 6-11 is even aligned; it may be padded out more so that
 2790 //         the region from SP to FP meets the minimum stack alignment.
 2791 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 2792 //         alignment.  Region 11, pad1, may be dynamically extended so that
 2793 //         SP meets the minimum alignment.
 2794 
 2795 frame
 2796 %{
 2797   // These three registers define part of the calling convention
 2798   // between compiled code and the interpreter.
 2799   inline_cache_reg(RAX);                // Inline Cache Register
 2800 
 2801   // Optional: name the operand used by cisc-spilling to access
 2802   // [stack_pointer + offset]
 2803   cisc_spilling_operand_name(indOffset32);
 2804 
 2805   // Number of stack slots consumed by locking an object
 2806   sync_stack_slots(2);
 2807 
 2808   // Compiled code's Frame Pointer
 2809   frame_pointer(RSP);
 2810 
 2811   // Interpreter stores its frame pointer in a register which is
 2812   // stored to the stack by I2CAdaptors.
 2813   // I2CAdaptors convert from interpreted java to compiled java.
 2814   interpreter_frame_pointer(RBP);
 2815 
 2816   // Stack alignment requirement
 2817   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 2818 
 2819   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 2820   // for calls to C.  Supports the var-args backing area for register parms.
 2821   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 2822 
 2823   // The after-PROLOG location of the return address.  Location of
 2824   // return address specifies a type (REG or STACK) and a number
 2825   // representing the register number (i.e. - use a register name) or
 2826   // stack slot.
 2827   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 2828   // Otherwise, it is above the locks and verification slot and alignment word
 2829   return_addr(STACK - 2 +
 2830               align_up((Compile::current()->in_preserve_stack_slots() +
 2831                         Compile::current()->fixed_slots()),
 2832                        stack_alignment_in_slots()));
 2833 
 2834   // Location of compiled Java return values.  Same as C for now.
 2835   return_value
 2836   %{
 2837     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 2838            "only return normal values");
 2839 
 2840     static const int lo[Op_RegL + 1] = {
 2841       0,
 2842       0,
 2843       RAX_num,  // Op_RegN
 2844       RAX_num,  // Op_RegI
 2845       RAX_num,  // Op_RegP
 2846       XMM0_num, // Op_RegF
 2847       XMM0_num, // Op_RegD
 2848       RAX_num   // Op_RegL
 2849     };
 2850     static const int hi[Op_RegL + 1] = {
 2851       0,
 2852       0,
 2853       OptoReg::Bad, // Op_RegN
 2854       OptoReg::Bad, // Op_RegI
 2855       RAX_H_num,    // Op_RegP
 2856       OptoReg::Bad, // Op_RegF
 2857       XMM0b_num,    // Op_RegD
 2858       RAX_H_num     // Op_RegL
 2859     };
 2860     // Excluded flags and vector registers.
 2861     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2862     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2863   %}
 2864 %}
 2865 
 2866 //----------ATTRIBUTES---------------------------------------------------------
 2867 //----------Operand Attributes-------------------------------------------------
 2868 op_attrib op_cost(0);        // Required cost attribute
 2869 
 2870 //----------Instruction Attributes---------------------------------------------
 2871 ins_attrib ins_cost(100);       // Required cost attribute
 2872 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2873 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2874                                 // a non-matching short branch variant
 2875                                 // of some long branch?
 2876 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2877                                 // be a power of 2) specifies the
 2878                                 // alignment that some part of the
 2879                                 // instruction (not necessarily the
 2880                                 // start) requires.  If > 1, a
 2881                                 // compute_padding() function must be
 2882                                 // provided for the instruction
 2883 
 2884 //----------OPERANDS-----------------------------------------------------------
 2885 // Operand definitions must precede instruction definitions for correct parsing
 2886 // in the ADLC because operands constitute user defined types which are used in
 2887 // instruction definitions.
 2888 
 2889 //----------Simple Operands----------------------------------------------------
 2890 // Immediate Operands
 2891 // Integer Immediate
 2892 operand immI()
 2893 %{
 2894   match(ConI);
 2895 
 2896   op_cost(10);
 2897   format %{ %}
 2898   interface(CONST_INTER);
 2899 %}
 2900 
 2901 // Constant for test vs zero
 2902 operand immI_0()
 2903 %{
 2904   predicate(n->get_int() == 0);
 2905   match(ConI);
 2906 
 2907   op_cost(0);
 2908   format %{ %}
 2909   interface(CONST_INTER);
 2910 %}
 2911 
 2912 // Constant for increment
 2913 operand immI_1()
 2914 %{
 2915   predicate(n->get_int() == 1);
 2916   match(ConI);
 2917 
 2918   op_cost(0);
 2919   format %{ %}
 2920   interface(CONST_INTER);
 2921 %}
 2922 
 2923 // Constant for decrement
 2924 operand immI_M1()
 2925 %{
 2926   predicate(n->get_int() == -1);
 2927   match(ConI);
 2928 
 2929   op_cost(0);
 2930   format %{ %}
 2931   interface(CONST_INTER);
 2932 %}
 2933 
 2934 operand immI_2()
 2935 %{
 2936   predicate(n->get_int() == 2);
 2937   match(ConI);
 2938 
 2939   op_cost(0);
 2940   format %{ %}
 2941   interface(CONST_INTER);
 2942 %}
 2943 
 2944 operand immI_4()
 2945 %{
 2946   predicate(n->get_int() == 4);
 2947   match(ConI);
 2948 
 2949   op_cost(0);
 2950   format %{ %}
 2951   interface(CONST_INTER);
 2952 %}
 2953 
 2954 operand immI_8()
 2955 %{
 2956   predicate(n->get_int() == 8);
 2957   match(ConI);
 2958 
 2959   op_cost(0);
 2960   format %{ %}
 2961   interface(CONST_INTER);
 2962 %}
 2963 
 2964 // Valid scale values for addressing modes
 2965 operand immI2()
 2966 %{
 2967   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2968   match(ConI);
 2969 
 2970   format %{ %}
 2971   interface(CONST_INTER);
 2972 %}
 2973 
 2974 operand immU7()
 2975 %{
 2976   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2977   match(ConI);
 2978 
 2979   op_cost(5);
 2980   format %{ %}
 2981   interface(CONST_INTER);
 2982 %}
 2983 
 2984 operand immI8()
 2985 %{
 2986   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2987   match(ConI);
 2988 
 2989   op_cost(5);
 2990   format %{ %}
 2991   interface(CONST_INTER);
 2992 %}
 2993 
 2994 operand immU8()
 2995 %{
 2996   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 2997   match(ConI);
 2998 
 2999   op_cost(5);
 3000   format %{ %}
 3001   interface(CONST_INTER);
 3002 %}
 3003 
 3004 operand immI16()
 3005 %{
 3006   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3007   match(ConI);
 3008 
 3009   op_cost(10);
 3010   format %{ %}
 3011   interface(CONST_INTER);
 3012 %}
 3013 
 3014 // Int Immediate non-negative
 3015 operand immU31()
 3016 %{
 3017   predicate(n->get_int() >= 0);
 3018   match(ConI);
 3019 
 3020   op_cost(0);
 3021   format %{ %}
 3022   interface(CONST_INTER);
 3023 %}
 3024 
 3025 // Constant for long shifts
 3026 operand immI_32()
 3027 %{
 3028   predicate( n->get_int() == 32 );
 3029   match(ConI);
 3030 
 3031   op_cost(0);
 3032   format %{ %}
 3033   interface(CONST_INTER);
 3034 %}
 3035 
 3036 // Constant for long shifts
 3037 operand immI_64()
 3038 %{
 3039   predicate( n->get_int() == 64 );
 3040   match(ConI);
 3041 
 3042   op_cost(0);
 3043   format %{ %}
 3044   interface(CONST_INTER);
 3045 %}
 3046 
 3047 // Pointer Immediate
 3048 operand immP()
 3049 %{
 3050   match(ConP);
 3051 
 3052   op_cost(10);
 3053   format %{ %}
 3054   interface(CONST_INTER);
 3055 %}
 3056 
 3057 // NULL Pointer Immediate
 3058 operand immP0()
 3059 %{
 3060   predicate(n->get_ptr() == 0);
 3061   match(ConP);
 3062 
 3063   op_cost(5);
 3064   format %{ %}
 3065   interface(CONST_INTER);
 3066 %}
 3067 
 3068 // Pointer Immediate
 3069 operand immN() %{
 3070   match(ConN);
 3071 
 3072   op_cost(10);
 3073   format %{ %}
 3074   interface(CONST_INTER);
 3075 %}
 3076 
 3077 operand immNKlass() %{
 3078   match(ConNKlass);
 3079 
 3080   op_cost(10);
 3081   format %{ %}
 3082   interface(CONST_INTER);
 3083 %}
 3084 
 3085 // NULL Pointer Immediate
 3086 operand immN0() %{
 3087   predicate(n->get_narrowcon() == 0);
 3088   match(ConN);
 3089 
 3090   op_cost(5);
 3091   format %{ %}
 3092   interface(CONST_INTER);
 3093 %}
 3094 
 3095 operand immP31()
 3096 %{
 3097   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3098             && (n->get_ptr() >> 31) == 0);
 3099   match(ConP);
 3100 
 3101   op_cost(5);
 3102   format %{ %}
 3103   interface(CONST_INTER);
 3104 %}
 3105 
 3106 
 3107 // Long Immediate
 3108 operand immL()
 3109 %{
 3110   match(ConL);
 3111 
 3112   op_cost(20);
 3113   format %{ %}
 3114   interface(CONST_INTER);
 3115 %}
 3116 
 3117 // Long Immediate 8-bit
 3118 operand immL8()
 3119 %{
 3120   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 3121   match(ConL);
 3122 
 3123   op_cost(5);
 3124   format %{ %}
 3125   interface(CONST_INTER);
 3126 %}
 3127 
 3128 // Long Immediate 32-bit unsigned
 3129 operand immUL32()
 3130 %{
 3131   predicate(n->get_long() == (unsigned int) (n->get_long()));
 3132   match(ConL);
 3133 
 3134   op_cost(10);
 3135   format %{ %}
 3136   interface(CONST_INTER);
 3137 %}
 3138 
 3139 // Long Immediate 32-bit signed
 3140 operand immL32()
 3141 %{
 3142   predicate(n->get_long() == (int) (n->get_long()));
 3143   match(ConL);
 3144 
 3145   op_cost(15);
 3146   format %{ %}
 3147   interface(CONST_INTER);
 3148 %}
 3149 
 3150 operand immL_Pow2()
 3151 %{
 3152   predicate(is_power_of_2((julong)n->get_long()));
 3153   match(ConL);
 3154 
 3155   op_cost(15);
 3156   format %{ %}
 3157   interface(CONST_INTER);
 3158 %}
 3159 
 3160 operand immL_NotPow2()
 3161 %{
 3162   predicate(is_power_of_2((julong)~n->get_long()));
 3163   match(ConL);
 3164 
 3165   op_cost(15);
 3166   format %{ %}
 3167   interface(CONST_INTER);
 3168 %}
 3169 
 3170 // Long Immediate zero
 3171 operand immL0()
 3172 %{
 3173   predicate(n->get_long() == 0L);
 3174   match(ConL);
 3175 
 3176   op_cost(10);
 3177   format %{ %}
 3178   interface(CONST_INTER);
 3179 %}
 3180 
 3181 // Constant for increment
 3182 operand immL1()
 3183 %{
 3184   predicate(n->get_long() == 1);
 3185   match(ConL);
 3186 
 3187   format %{ %}
 3188   interface(CONST_INTER);
 3189 %}
 3190 
 3191 // Constant for decrement
 3192 operand immL_M1()
 3193 %{
 3194   predicate(n->get_long() == -1);
 3195   match(ConL);
 3196 
 3197   format %{ %}
 3198   interface(CONST_INTER);
 3199 %}
 3200 
 3201 // Long Immediate: the value 10
 3202 operand immL10()
 3203 %{
 3204   predicate(n->get_long() == 10);
 3205   match(ConL);
 3206 
 3207   format %{ %}
 3208   interface(CONST_INTER);
 3209 %}
 3210 
 3211 // Long immediate from 0 to 127.
 3212 // Used for a shorter form of long mul by 10.
 3213 operand immL_127()
 3214 %{
 3215   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 3216   match(ConL);
 3217 
 3218   op_cost(10);
 3219   format %{ %}
 3220   interface(CONST_INTER);
 3221 %}
 3222 
 3223 // Long Immediate: low 32-bit mask
 3224 operand immL_32bits()
 3225 %{
 3226   predicate(n->get_long() == 0xFFFFFFFFL);
 3227   match(ConL);
 3228   op_cost(20);
 3229 
 3230   format %{ %}
 3231   interface(CONST_INTER);
 3232 %}
 3233 
 3234 // Int Immediate: 2^n-1, positive
 3235 operand immI_Pow2M1()
 3236 %{
 3237   predicate((n->get_int() > 0)
 3238             && is_power_of_2((juint)n->get_int() + 1));
 3239   match(ConI);
 3240 
 3241   op_cost(20);
 3242   format %{ %}
 3243   interface(CONST_INTER);
 3244 %}
 3245 
 3246 // Float Immediate zero
 3247 operand immF0()
 3248 %{
 3249   predicate(jint_cast(n->getf()) == 0);
 3250   match(ConF);
 3251 
 3252   op_cost(5);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 // Float Immediate
 3258 operand immF()
 3259 %{
 3260   match(ConF);
 3261 
 3262   op_cost(15);
 3263   format %{ %}
 3264   interface(CONST_INTER);
 3265 %}
 3266 
 3267 // Double Immediate zero
 3268 operand immD0()
 3269 %{
 3270   predicate(jlong_cast(n->getd()) == 0);
 3271   match(ConD);
 3272 
 3273   op_cost(5);
 3274   format %{ %}
 3275   interface(CONST_INTER);
 3276 %}
 3277 
 3278 // Double Immediate
 3279 operand immD()
 3280 %{
 3281   match(ConD);
 3282 
 3283   op_cost(15);
 3284   format %{ %}
 3285   interface(CONST_INTER);
 3286 %}
 3287 
 3288 // Immediates for special shifts (sign extend)
 3289 
 3290 // Constants for increment
 3291 operand immI_16()
 3292 %{
 3293   predicate(n->get_int() == 16);
 3294   match(ConI);
 3295 
 3296   format %{ %}
 3297   interface(CONST_INTER);
 3298 %}
 3299 
 3300 operand immI_24()
 3301 %{
 3302   predicate(n->get_int() == 24);
 3303   match(ConI);
 3304 
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 // Constant for byte-wide masking
 3310 operand immI_255()
 3311 %{
 3312   predicate(n->get_int() == 255);
 3313   match(ConI);
 3314 
 3315   format %{ %}
 3316   interface(CONST_INTER);
 3317 %}
 3318 
 3319 // Constant for short-wide masking
 3320 operand immI_65535()
 3321 %{
 3322   predicate(n->get_int() == 65535);
 3323   match(ConI);
 3324 
 3325   format %{ %}
 3326   interface(CONST_INTER);
 3327 %}
 3328 
 3329 // Constant for byte-wide masking
 3330 operand immL_255()
 3331 %{
 3332   predicate(n->get_long() == 255);
 3333   match(ConL);
 3334 
 3335   format %{ %}
 3336   interface(CONST_INTER);
 3337 %}
 3338 
 3339 // Constant for short-wide masking
 3340 operand immL_65535()
 3341 %{
 3342   predicate(n->get_long() == 65535);
 3343   match(ConL);
 3344 
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand kReg()
 3350 %{
 3351   constraint(ALLOC_IN_RC(vectmask_reg));
 3352   match(RegVectMask);
 3353   format %{%}
 3354   interface(REG_INTER);
 3355 %}
 3356 
 3357 operand kReg_K1()
 3358 %{
 3359   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3360   match(RegVectMask);
 3361   format %{%}
 3362   interface(REG_INTER);
 3363 %}
 3364 
 3365 operand kReg_K2()
 3366 %{
 3367   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3368   match(RegVectMask);
 3369   format %{%}
 3370   interface(REG_INTER);
 3371 %}
 3372 
 3373 // Special Registers
 3374 operand kReg_K3()
 3375 %{
 3376   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3377   match(RegVectMask);
 3378   format %{%}
 3379   interface(REG_INTER);
 3380 %}
 3381 
 3382 operand kReg_K4()
 3383 %{
 3384   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3385   match(RegVectMask);
 3386   format %{%}
 3387   interface(REG_INTER);
 3388 %}
 3389 
 3390 operand kReg_K5()
 3391 %{
 3392   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3393   match(RegVectMask);
 3394   format %{%}
 3395   interface(REG_INTER);
 3396 %}
 3397 
 3398 operand kReg_K6()
 3399 %{
 3400   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3401   match(RegVectMask);
 3402   format %{%}
 3403   interface(REG_INTER);
 3404 %}
 3405 
 3406 // Special Registers
 3407 operand kReg_K7()
 3408 %{
 3409   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3410   match(RegVectMask);
 3411   format %{%}
 3412   interface(REG_INTER);
 3413 %}
 3414 
 3415 // Register Operands
 3416 // Integer Register
 3417 operand rRegI()
 3418 %{
 3419   constraint(ALLOC_IN_RC(int_reg));
 3420   match(RegI);
 3421 
 3422   match(rax_RegI);
 3423   match(rbx_RegI);
 3424   match(rcx_RegI);
 3425   match(rdx_RegI);
 3426   match(rdi_RegI);
 3427 
 3428   format %{ %}
 3429   interface(REG_INTER);
 3430 %}
 3431 
 3432 // Special Registers
 3433 operand rax_RegI()
 3434 %{
 3435   constraint(ALLOC_IN_RC(int_rax_reg));
 3436   match(RegI);
 3437   match(rRegI);
 3438 
 3439   format %{ "RAX" %}
 3440   interface(REG_INTER);
 3441 %}
 3442 
 3443 // Special Registers
 3444 operand rbx_RegI()
 3445 %{
 3446   constraint(ALLOC_IN_RC(int_rbx_reg));
 3447   match(RegI);
 3448   match(rRegI);
 3449 
 3450   format %{ "RBX" %}
 3451   interface(REG_INTER);
 3452 %}
 3453 
 3454 operand rcx_RegI()
 3455 %{
 3456   constraint(ALLOC_IN_RC(int_rcx_reg));
 3457   match(RegI);
 3458   match(rRegI);
 3459 
 3460   format %{ "RCX" %}
 3461   interface(REG_INTER);
 3462 %}
 3463 
 3464 operand rdx_RegI()
 3465 %{
 3466   constraint(ALLOC_IN_RC(int_rdx_reg));
 3467   match(RegI);
 3468   match(rRegI);
 3469 
 3470   format %{ "RDX" %}
 3471   interface(REG_INTER);
 3472 %}
 3473 
 3474 operand rdi_RegI()
 3475 %{
 3476   constraint(ALLOC_IN_RC(int_rdi_reg));
 3477   match(RegI);
 3478   match(rRegI);
 3479 
 3480   format %{ "RDI" %}
 3481   interface(REG_INTER);
 3482 %}
 3483 
 3484 operand no_rax_rdx_RegI()
 3485 %{
 3486   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 3487   match(RegI);
 3488   match(rbx_RegI);
 3489   match(rcx_RegI);
 3490   match(rdi_RegI);
 3491 
 3492   format %{ %}
 3493   interface(REG_INTER);
 3494 %}
 3495 
 3496 operand no_rbp_r13_RegI()
 3497 %{
 3498   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 3499   match(RegI);
 3500   match(rRegI);
 3501   match(rax_RegI);
 3502   match(rbx_RegI);
 3503   match(rcx_RegI);
 3504   match(rdx_RegI);
 3505   match(rdi_RegI);
 3506 
 3507   format %{ %}
 3508   interface(REG_INTER);
 3509 %}
 3510 
 3511 // Pointer Register
 3512 operand any_RegP()
 3513 %{
 3514   constraint(ALLOC_IN_RC(any_reg));
 3515   match(RegP);
 3516   match(rax_RegP);
 3517   match(rbx_RegP);
 3518   match(rdi_RegP);
 3519   match(rsi_RegP);
 3520   match(rbp_RegP);
 3521   match(r15_RegP);
 3522   match(rRegP);
 3523 
 3524   format %{ %}
 3525   interface(REG_INTER);
 3526 %}
 3527 
 3528 operand rRegP()
 3529 %{
 3530   constraint(ALLOC_IN_RC(ptr_reg));
 3531   match(RegP);
 3532   match(rax_RegP);
 3533   match(rbx_RegP);
 3534   match(rdi_RegP);
 3535   match(rsi_RegP);
 3536   match(rbp_RegP);  // See Q&A below about
 3537   match(r15_RegP);  // r15_RegP and rbp_RegP.
 3538 
 3539   format %{ %}
 3540   interface(REG_INTER);
 3541 %}
 3542 
 3543 operand rRegN() %{
 3544   constraint(ALLOC_IN_RC(int_reg));
 3545   match(RegN);
 3546 
 3547   format %{ %}
 3548   interface(REG_INTER);
 3549 %}
 3550 
 3551 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 3552 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 3553 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 3554 // The output of an instruction is controlled by the allocator, which respects
 3555 // register class masks, not match rules.  Unless an instruction mentions
 3556 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 3557 // by the allocator as an input.
 3558 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 3559 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 3560 // result, RBP is not included in the output of the instruction either.
 3561 
 3562 operand no_rax_RegP()
 3563 %{
 3564   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 3565   match(RegP);
 3566   match(rbx_RegP);
 3567   match(rsi_RegP);
 3568   match(rdi_RegP);
 3569 
 3570   format %{ %}
 3571   interface(REG_INTER);
 3572 %}
 3573 
 3574 // This operand is not allowed to use RBP even if
 3575 // RBP is not used to hold the frame pointer.
 3576 operand no_rbp_RegP()
 3577 %{
 3578   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 3579   match(RegP);
 3580   match(rbx_RegP);
 3581   match(rsi_RegP);
 3582   match(rdi_RegP);
 3583 
 3584   format %{ %}
 3585   interface(REG_INTER);
 3586 %}
 3587 
 3588 operand no_rax_rbx_RegP()
 3589 %{
 3590   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 3591   match(RegP);
 3592   match(rsi_RegP);
 3593   match(rdi_RegP);
 3594 
 3595   format %{ %}
 3596   interface(REG_INTER);
 3597 %}
 3598 
 3599 // Special Registers
 3600 // Return a pointer value
 3601 operand rax_RegP()
 3602 %{
 3603   constraint(ALLOC_IN_RC(ptr_rax_reg));
 3604   match(RegP);
 3605   match(rRegP);
 3606 
 3607   format %{ %}
 3608   interface(REG_INTER);
 3609 %}
 3610 
 3611 // Special Registers
 3612 // Return a compressed pointer value
 3613 operand rax_RegN()
 3614 %{
 3615   constraint(ALLOC_IN_RC(int_rax_reg));
 3616   match(RegN);
 3617   match(rRegN);
 3618 
 3619   format %{ %}
 3620   interface(REG_INTER);
 3621 %}
 3622 
 3623 // Used in AtomicAdd
 3624 operand rbx_RegP()
 3625 %{
 3626   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 3627   match(RegP);
 3628   match(rRegP);
 3629 
 3630   format %{ %}
 3631   interface(REG_INTER);
 3632 %}
 3633 
 3634 operand rsi_RegP()
 3635 %{
 3636   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 3637   match(RegP);
 3638   match(rRegP);
 3639 
 3640   format %{ %}
 3641   interface(REG_INTER);
 3642 %}
 3643 
 3644 operand rbp_RegP()
 3645 %{
 3646   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 3647   match(RegP);
 3648   match(rRegP);
 3649 
 3650   format %{ %}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 // Used in rep stosq
 3655 operand rdi_RegP()
 3656 %{
 3657   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 3658   match(RegP);
 3659   match(rRegP);
 3660 
 3661   format %{ %}
 3662   interface(REG_INTER);
 3663 %}
 3664 
 3665 operand r15_RegP()
 3666 %{
 3667   constraint(ALLOC_IN_RC(ptr_r15_reg));
 3668   match(RegP);
 3669   match(rRegP);
 3670 
 3671   format %{ %}
 3672   interface(REG_INTER);
 3673 %}
 3674 
 3675 operand rRegL()
 3676 %{
 3677   constraint(ALLOC_IN_RC(long_reg));
 3678   match(RegL);
 3679   match(rax_RegL);
 3680   match(rdx_RegL);
 3681 
 3682   format %{ %}
 3683   interface(REG_INTER);
 3684 %}
 3685 
 3686 // Special Registers
 3687 operand no_rax_rdx_RegL()
 3688 %{
 3689   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 3690   match(RegL);
 3691   match(rRegL);
 3692 
 3693   format %{ %}
 3694   interface(REG_INTER);
 3695 %}
 3696 
 3697 operand rax_RegL()
 3698 %{
 3699   constraint(ALLOC_IN_RC(long_rax_reg));
 3700   match(RegL);
 3701   match(rRegL);
 3702 
 3703   format %{ "RAX" %}
 3704   interface(REG_INTER);
 3705 %}
 3706 
 3707 operand rcx_RegL()
 3708 %{
 3709   constraint(ALLOC_IN_RC(long_rcx_reg));
 3710   match(RegL);
 3711   match(rRegL);
 3712 
 3713   format %{ %}
 3714   interface(REG_INTER);
 3715 %}
 3716 
 3717 operand rdx_RegL()
 3718 %{
 3719   constraint(ALLOC_IN_RC(long_rdx_reg));
 3720   match(RegL);
 3721   match(rRegL);
 3722 
 3723   format %{ %}
 3724   interface(REG_INTER);
 3725 %}
 3726 
 3727 operand no_rbp_r13_RegL()
 3728 %{
 3729   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 3730   match(RegL);
 3731   match(rRegL);
 3732   match(rax_RegL);
 3733   match(rcx_RegL);
 3734   match(rdx_RegL);
 3735 
 3736   format %{ %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 // Flags register, used as output of compare instructions
 3741 operand rFlagsReg()
 3742 %{
 3743   constraint(ALLOC_IN_RC(int_flags));
 3744   match(RegFlags);
 3745 
 3746   format %{ "RFLAGS" %}
 3747   interface(REG_INTER);
 3748 %}
 3749 
 3750 // Flags register, used as output of FLOATING POINT compare instructions
 3751 operand rFlagsRegU()
 3752 %{
 3753   constraint(ALLOC_IN_RC(int_flags));
 3754   match(RegFlags);
 3755 
 3756   format %{ "RFLAGS_U" %}
 3757   interface(REG_INTER);
 3758 %}
 3759 
 3760 operand rFlagsRegUCF() %{
 3761   constraint(ALLOC_IN_RC(int_flags));
 3762   match(RegFlags);
 3763   predicate(false);
 3764 
 3765   format %{ "RFLAGS_U_CF" %}
 3766   interface(REG_INTER);
 3767 %}
 3768 
 3769 // Float register operands
 3770 operand regF() %{
 3771    constraint(ALLOC_IN_RC(float_reg));
 3772    match(RegF);
 3773 
 3774    format %{ %}
 3775    interface(REG_INTER);
 3776 %}
 3777 
 3778 // Float register operands
 3779 operand legRegF() %{
 3780    constraint(ALLOC_IN_RC(float_reg_legacy));
 3781    match(RegF);
 3782 
 3783    format %{ %}
 3784    interface(REG_INTER);
 3785 %}
 3786 
 3787 // Float register operands
 3788 operand vlRegF() %{
 3789    constraint(ALLOC_IN_RC(float_reg_vl));
 3790    match(RegF);
 3791 
 3792    format %{ %}
 3793    interface(REG_INTER);
 3794 %}
 3795 
 3796 // Double register operands
 3797 operand regD() %{
 3798    constraint(ALLOC_IN_RC(double_reg));
 3799    match(RegD);
 3800 
 3801    format %{ %}
 3802    interface(REG_INTER);
 3803 %}
 3804 
 3805 // Double register operands
 3806 operand legRegD() %{
 3807    constraint(ALLOC_IN_RC(double_reg_legacy));
 3808    match(RegD);
 3809 
 3810    format %{ %}
 3811    interface(REG_INTER);
 3812 %}
 3813 
 3814 // Double register operands
 3815 operand vlRegD() %{
 3816    constraint(ALLOC_IN_RC(double_reg_vl));
 3817    match(RegD);
 3818 
 3819    format %{ %}
 3820    interface(REG_INTER);
 3821 %}
 3822 
 3823 //----------Memory Operands----------------------------------------------------
 3824 // Direct Memory Operand
 3825 // operand direct(immP addr)
 3826 // %{
 3827 //   match(addr);
 3828 
 3829 //   format %{ "[$addr]" %}
 3830 //   interface(MEMORY_INTER) %{
 3831 //     base(0xFFFFFFFF);
 3832 //     index(0x4);
 3833 //     scale(0x0);
 3834 //     disp($addr);
 3835 //   %}
 3836 // %}
 3837 
 3838 // Indirect Memory Operand
 3839 operand indirect(any_RegP reg)
 3840 %{
 3841   constraint(ALLOC_IN_RC(ptr_reg));
 3842   match(reg);
 3843 
 3844   format %{ "[$reg]" %}
 3845   interface(MEMORY_INTER) %{
 3846     base($reg);
 3847     index(0x4);
 3848     scale(0x0);
 3849     disp(0x0);
 3850   %}
 3851 %}
 3852 
 3853 // Indirect Memory Plus Short Offset Operand
 3854 operand indOffset8(any_RegP reg, immL8 off)
 3855 %{
 3856   constraint(ALLOC_IN_RC(ptr_reg));
 3857   match(AddP reg off);
 3858 
 3859   format %{ "[$reg + $off (8-bit)]" %}
 3860   interface(MEMORY_INTER) %{
 3861     base($reg);
 3862     index(0x4);
 3863     scale(0x0);
 3864     disp($off);
 3865   %}
 3866 %}
 3867 
 3868 // Indirect Memory Plus Long Offset Operand
 3869 operand indOffset32(any_RegP reg, immL32 off)
 3870 %{
 3871   constraint(ALLOC_IN_RC(ptr_reg));
 3872   match(AddP reg off);
 3873 
 3874   format %{ "[$reg + $off (32-bit)]" %}
 3875   interface(MEMORY_INTER) %{
 3876     base($reg);
 3877     index(0x4);
 3878     scale(0x0);
 3879     disp($off);
 3880   %}
 3881 %}
 3882 
 3883 // Indirect Memory Plus Index Register Plus Offset Operand
 3884 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3885 %{
 3886   constraint(ALLOC_IN_RC(ptr_reg));
 3887   match(AddP (AddP reg lreg) off);
 3888 
 3889   op_cost(10);
 3890   format %{"[$reg + $off + $lreg]" %}
 3891   interface(MEMORY_INTER) %{
 3892     base($reg);
 3893     index($lreg);
 3894     scale(0x0);
 3895     disp($off);
 3896   %}
 3897 %}
 3898 
 3899 // Indirect Memory Plus Index Register Plus Offset Operand
 3900 operand indIndex(any_RegP reg, rRegL lreg)
 3901 %{
 3902   constraint(ALLOC_IN_RC(ptr_reg));
 3903   match(AddP reg lreg);
 3904 
 3905   op_cost(10);
 3906   format %{"[$reg + $lreg]" %}
 3907   interface(MEMORY_INTER) %{
 3908     base($reg);
 3909     index($lreg);
 3910     scale(0x0);
 3911     disp(0x0);
 3912   %}
 3913 %}
 3914 
 3915 // Indirect Memory Times Scale Plus Index Register
 3916 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3917 %{
 3918   constraint(ALLOC_IN_RC(ptr_reg));
 3919   match(AddP reg (LShiftL lreg scale));
 3920 
 3921   op_cost(10);
 3922   format %{"[$reg + $lreg << $scale]" %}
 3923   interface(MEMORY_INTER) %{
 3924     base($reg);
 3925     index($lreg);
 3926     scale($scale);
 3927     disp(0x0);
 3928   %}
 3929 %}
 3930 
 3931 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3932 %{
 3933   constraint(ALLOC_IN_RC(ptr_reg));
 3934   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3935   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3936 
 3937   op_cost(10);
 3938   format %{"[$reg + pos $idx << $scale]" %}
 3939   interface(MEMORY_INTER) %{
 3940     base($reg);
 3941     index($idx);
 3942     scale($scale);
 3943     disp(0x0);
 3944   %}
 3945 %}
 3946 
 3947 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3948 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3949 %{
 3950   constraint(ALLOC_IN_RC(ptr_reg));
 3951   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3952 
 3953   op_cost(10);
 3954   format %{"[$reg + $off + $lreg << $scale]" %}
 3955   interface(MEMORY_INTER) %{
 3956     base($reg);
 3957     index($lreg);
 3958     scale($scale);
 3959     disp($off);
 3960   %}
 3961 %}
 3962 
 3963 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3964 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3965 %{
 3966   constraint(ALLOC_IN_RC(ptr_reg));
 3967   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3968   match(AddP (AddP reg (ConvI2L idx)) off);
 3969 
 3970   op_cost(10);
 3971   format %{"[$reg + $off + $idx]" %}
 3972   interface(MEMORY_INTER) %{
 3973     base($reg);
 3974     index($idx);
 3975     scale(0x0);
 3976     disp($off);
 3977   %}
 3978 %}
 3979 
 3980 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3981 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3982 %{
 3983   constraint(ALLOC_IN_RC(ptr_reg));
 3984   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3985   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3986 
 3987   op_cost(10);
 3988   format %{"[$reg + $off + $idx << $scale]" %}
 3989   interface(MEMORY_INTER) %{
 3990     base($reg);
 3991     index($idx);
 3992     scale($scale);
 3993     disp($off);
 3994   %}
 3995 %}
 3996 
 3997 // Indirect Narrow Oop Plus Offset Operand
 3998 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3999 // we can't free r12 even with CompressedOops::base() == NULL.
 4000 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4001   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4002   constraint(ALLOC_IN_RC(ptr_reg));
 4003   match(AddP (DecodeN reg) off);
 4004 
 4005   op_cost(10);
 4006   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4007   interface(MEMORY_INTER) %{
 4008     base(0xc); // R12
 4009     index($reg);
 4010     scale(0x3);
 4011     disp($off);
 4012   %}
 4013 %}
 4014 
 4015 // Indirect Memory Operand
 4016 operand indirectNarrow(rRegN reg)
 4017 %{
 4018   predicate(CompressedOops::shift() == 0);
 4019   constraint(ALLOC_IN_RC(ptr_reg));
 4020   match(DecodeN reg);
 4021 
 4022   format %{ "[$reg]" %}
 4023   interface(MEMORY_INTER) %{
 4024     base($reg);
 4025     index(0x4);
 4026     scale(0x0);
 4027     disp(0x0);
 4028   %}
 4029 %}
 4030 
 4031 // Indirect Memory Plus Short Offset Operand
 4032 operand indOffset8Narrow(rRegN reg, immL8 off)
 4033 %{
 4034   predicate(CompressedOops::shift() == 0);
 4035   constraint(ALLOC_IN_RC(ptr_reg));
 4036   match(AddP (DecodeN reg) off);
 4037 
 4038   format %{ "[$reg + $off (8-bit)]" %}
 4039   interface(MEMORY_INTER) %{
 4040     base($reg);
 4041     index(0x4);
 4042     scale(0x0);
 4043     disp($off);
 4044   %}
 4045 %}
 4046 
 4047 // Indirect Memory Plus Long Offset Operand
 4048 operand indOffset32Narrow(rRegN reg, immL32 off)
 4049 %{
 4050   predicate(CompressedOops::shift() == 0);
 4051   constraint(ALLOC_IN_RC(ptr_reg));
 4052   match(AddP (DecodeN reg) off);
 4053 
 4054   format %{ "[$reg + $off (32-bit)]" %}
 4055   interface(MEMORY_INTER) %{
 4056     base($reg);
 4057     index(0x4);
 4058     scale(0x0);
 4059     disp($off);
 4060   %}
 4061 %}
 4062 
 4063 // Indirect Memory Plus Index Register Plus Offset Operand
 4064 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 4065 %{
 4066   predicate(CompressedOops::shift() == 0);
 4067   constraint(ALLOC_IN_RC(ptr_reg));
 4068   match(AddP (AddP (DecodeN reg) lreg) off);
 4069 
 4070   op_cost(10);
 4071   format %{"[$reg + $off + $lreg]" %}
 4072   interface(MEMORY_INTER) %{
 4073     base($reg);
 4074     index($lreg);
 4075     scale(0x0);
 4076     disp($off);
 4077   %}
 4078 %}
 4079 
 4080 // Indirect Memory Plus Index Register Plus Offset Operand
 4081 operand indIndexNarrow(rRegN reg, rRegL lreg)
 4082 %{
 4083   predicate(CompressedOops::shift() == 0);
 4084   constraint(ALLOC_IN_RC(ptr_reg));
 4085   match(AddP (DecodeN reg) lreg);
 4086 
 4087   op_cost(10);
 4088   format %{"[$reg + $lreg]" %}
 4089   interface(MEMORY_INTER) %{
 4090     base($reg);
 4091     index($lreg);
 4092     scale(0x0);
 4093     disp(0x0);
 4094   %}
 4095 %}
 4096 
 4097 // Indirect Memory Times Scale Plus Index Register
 4098 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 4099 %{
 4100   predicate(CompressedOops::shift() == 0);
 4101   constraint(ALLOC_IN_RC(ptr_reg));
 4102   match(AddP (DecodeN reg) (LShiftL lreg scale));
 4103 
 4104   op_cost(10);
 4105   format %{"[$reg + $lreg << $scale]" %}
 4106   interface(MEMORY_INTER) %{
 4107     base($reg);
 4108     index($lreg);
 4109     scale($scale);
 4110     disp(0x0);
 4111   %}
 4112 %}
 4113 
 4114 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4115 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 4116 %{
 4117   predicate(CompressedOops::shift() == 0);
 4118   constraint(ALLOC_IN_RC(ptr_reg));
 4119   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 4120 
 4121   op_cost(10);
 4122   format %{"[$reg + $off + $lreg << $scale]" %}
 4123   interface(MEMORY_INTER) %{
 4124     base($reg);
 4125     index($lreg);
 4126     scale($scale);
 4127     disp($off);
 4128   %}
 4129 %}
 4130 
 4131 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 4132 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 4133 %{
 4134   constraint(ALLOC_IN_RC(ptr_reg));
 4135   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 4136   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 4137 
 4138   op_cost(10);
 4139   format %{"[$reg + $off + $idx]" %}
 4140   interface(MEMORY_INTER) %{
 4141     base($reg);
 4142     index($idx);
 4143     scale(0x0);
 4144     disp($off);
 4145   %}
 4146 %}
 4147 
 4148 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4149 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 4150 %{
 4151   constraint(ALLOC_IN_RC(ptr_reg));
 4152   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4153   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 4154 
 4155   op_cost(10);
 4156   format %{"[$reg + $off + $idx << $scale]" %}
 4157   interface(MEMORY_INTER) %{
 4158     base($reg);
 4159     index($idx);
 4160     scale($scale);
 4161     disp($off);
 4162   %}
 4163 %}
 4164 
 4165 //----------Special Memory Operands--------------------------------------------
 4166 // Stack Slot Operand - This operand is used for loading and storing temporary
 4167 //                      values on the stack where a match requires a value to
 4168 //                      flow through memory.
 4169 operand stackSlotP(sRegP reg)
 4170 %{
 4171   constraint(ALLOC_IN_RC(stack_slots));
 4172   // No match rule because this operand is only generated in matching
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base(0x4);   // RSP
 4177     index(0x4);  // No Index
 4178     scale(0x0);  // No Scale
 4179     disp($reg);  // Stack Offset
 4180   %}
 4181 %}
 4182 
 4183 operand stackSlotI(sRegI reg)
 4184 %{
 4185   constraint(ALLOC_IN_RC(stack_slots));
 4186   // No match rule because this operand is only generated in matching
 4187 
 4188   format %{ "[$reg]" %}
 4189   interface(MEMORY_INTER) %{
 4190     base(0x4);   // RSP
 4191     index(0x4);  // No Index
 4192     scale(0x0);  // No Scale
 4193     disp($reg);  // Stack Offset
 4194   %}
 4195 %}
 4196 
 4197 operand stackSlotF(sRegF reg)
 4198 %{
 4199   constraint(ALLOC_IN_RC(stack_slots));
 4200   // No match rule because this operand is only generated in matching
 4201 
 4202   format %{ "[$reg]" %}
 4203   interface(MEMORY_INTER) %{
 4204     base(0x4);   // RSP
 4205     index(0x4);  // No Index
 4206     scale(0x0);  // No Scale
 4207     disp($reg);  // Stack Offset
 4208   %}
 4209 %}
 4210 
 4211 operand stackSlotD(sRegD reg)
 4212 %{
 4213   constraint(ALLOC_IN_RC(stack_slots));
 4214   // No match rule because this operand is only generated in matching
 4215 
 4216   format %{ "[$reg]" %}
 4217   interface(MEMORY_INTER) %{
 4218     base(0x4);   // RSP
 4219     index(0x4);  // No Index
 4220     scale(0x0);  // No Scale
 4221     disp($reg);  // Stack Offset
 4222   %}
 4223 %}
 4224 operand stackSlotL(sRegL reg)
 4225 %{
 4226   constraint(ALLOC_IN_RC(stack_slots));
 4227   // No match rule because this operand is only generated in matching
 4228 
 4229   format %{ "[$reg]" %}
 4230   interface(MEMORY_INTER) %{
 4231     base(0x4);   // RSP
 4232     index(0x4);  // No Index
 4233     scale(0x0);  // No Scale
 4234     disp($reg);  // Stack Offset
 4235   %}
 4236 %}
 4237 
 4238 //----------Conditional Branch Operands----------------------------------------
 4239 // Comparison Op  - This is the operation of the comparison, and is limited to
 4240 //                  the following set of codes:
 4241 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4242 //
 4243 // Other attributes of the comparison, such as unsignedness, are specified
 4244 // by the comparison instruction that sets a condition code flags register.
 4245 // That result is represented by a flags operand whose subtype is appropriate
 4246 // to the unsignedness (etc.) of the comparison.
 4247 //
 4248 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4249 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4250 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4251 
 4252 // Comparison Code
 4253 operand cmpOp()
 4254 %{
 4255   match(Bool);
 4256 
 4257   format %{ "" %}
 4258   interface(COND_INTER) %{
 4259     equal(0x4, "e");
 4260     not_equal(0x5, "ne");
 4261     less(0xC, "l");
 4262     greater_equal(0xD, "ge");
 4263     less_equal(0xE, "le");
 4264     greater(0xF, "g");
 4265     overflow(0x0, "o");
 4266     no_overflow(0x1, "no");
 4267   %}
 4268 %}
 4269 
 4270 // Comparison Code, unsigned compare.  Used by FP also, with
 4271 // C2 (unordered) turned into GT or LT already.  The other bits
 4272 // C0 and C3 are turned into Carry & Zero flags.
 4273 operand cmpOpU()
 4274 %{
 4275   match(Bool);
 4276 
 4277   format %{ "" %}
 4278   interface(COND_INTER) %{
 4279     equal(0x4, "e");
 4280     not_equal(0x5, "ne");
 4281     less(0x2, "b");
 4282     greater_equal(0x3, "ae");
 4283     less_equal(0x6, "be");
 4284     greater(0x7, "a");
 4285     overflow(0x0, "o");
 4286     no_overflow(0x1, "no");
 4287   %}
 4288 %}
 4289 
 4290 
 4291 // Floating comparisons that don't require any fixup for the unordered case,
 4292 // If both inputs of the comparison are the same, ZF is always set so we
 4293 // don't need to use cmpOpUCF2 for eq/ne
 4294 operand cmpOpUCF() %{
 4295   match(Bool);
 4296   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4297             n->as_Bool()->_test._test == BoolTest::ge ||
 4298             n->as_Bool()->_test._test == BoolTest::le ||
 4299             n->as_Bool()->_test._test == BoolTest::gt ||
 4300             n->in(1)->in(1) == n->in(1)->in(2));
 4301   format %{ "" %}
 4302   interface(COND_INTER) %{
 4303     equal(0xb, "np");
 4304     not_equal(0xa, "p");
 4305     less(0x2, "b");
 4306     greater_equal(0x3, "ae");
 4307     less_equal(0x6, "be");
 4308     greater(0x7, "a");
 4309     overflow(0x0, "o");
 4310     no_overflow(0x1, "no");
 4311   %}
 4312 %}
 4313 
 4314 
 4315 // Floating comparisons that can be fixed up with extra conditional jumps
 4316 operand cmpOpUCF2() %{
 4317   match(Bool);
 4318   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 4319              n->as_Bool()->_test._test == BoolTest::eq) &&
 4320             n->in(1)->in(1) != n->in(1)->in(2));
 4321   format %{ "" %}
 4322   interface(COND_INTER) %{
 4323     equal(0x4, "e");
 4324     not_equal(0x5, "ne");
 4325     less(0x2, "b");
 4326     greater_equal(0x3, "ae");
 4327     less_equal(0x6, "be");
 4328     greater(0x7, "a");
 4329     overflow(0x0, "o");
 4330     no_overflow(0x1, "no");
 4331   %}
 4332 %}
 4333 
 4334 //----------OPERAND CLASSES----------------------------------------------------
 4335 // Operand Classes are groups of operands that are used as to simplify
 4336 // instruction definitions by not requiring the AD writer to specify separate
 4337 // instructions for every form of operand when the instruction accepts
 4338 // multiple operand types with the same basic encoding and format.  The classic
 4339 // case of this is memory operands.
 4340 
 4341 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4342                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4343                indCompressedOopOffset,
 4344                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4345                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4346                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4347 
 4348 //----------PIPELINE-----------------------------------------------------------
 4349 // Rules which define the behavior of the target architectures pipeline.
 4350 pipeline %{
 4351 
 4352 //----------ATTRIBUTES---------------------------------------------------------
 4353 attributes %{
 4354   variable_size_instructions;        // Fixed size instructions
 4355   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4356   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4357   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4358   instruction_fetch_units = 1;       // of 16 bytes
 4359 
 4360   // List of nop instructions
 4361   nops( MachNop );
 4362 %}
 4363 
 4364 //----------RESOURCES----------------------------------------------------------
 4365 // Resources are the functional units available to the machine
 4366 
 4367 // Generic P2/P3 pipeline
 4368 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4369 // 3 instructions decoded per cycle.
 4370 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4371 // 3 ALU op, only ALU0 handles mul instructions.
 4372 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4373            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 4374            BR, FPU,
 4375            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 4376 
 4377 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4378 // Pipeline Description specifies the stages in the machine's pipeline
 4379 
 4380 // Generic P2/P3 pipeline
 4381 pipe_desc(S0, S1, S2, S3, S4, S5);
 4382 
 4383 //----------PIPELINE CLASSES---------------------------------------------------
 4384 // Pipeline Classes describe the stages in which input and output are
 4385 // referenced by the hardware pipeline.
 4386 
 4387 // Naming convention: ialu or fpu
 4388 // Then: _reg
 4389 // Then: _reg if there is a 2nd register
 4390 // Then: _long if it's a pair of instructions implementing a long
 4391 // Then: _fat if it requires the big decoder
 4392 //   Or: _mem if it requires the big decoder and a memory unit.
 4393 
 4394 // Integer ALU reg operation
 4395 pipe_class ialu_reg(rRegI dst)
 4396 %{
 4397     single_instruction;
 4398     dst    : S4(write);
 4399     dst    : S3(read);
 4400     DECODE : S0;        // any decoder
 4401     ALU    : S3;        // any alu
 4402 %}
 4403 
 4404 // Long ALU reg operation
 4405 pipe_class ialu_reg_long(rRegL dst)
 4406 %{
 4407     instruction_count(2);
 4408     dst    : S4(write);
 4409     dst    : S3(read);
 4410     DECODE : S0(2);     // any 2 decoders
 4411     ALU    : S3(2);     // both alus
 4412 %}
 4413 
 4414 // Integer ALU reg operation using big decoder
 4415 pipe_class ialu_reg_fat(rRegI dst)
 4416 %{
 4417     single_instruction;
 4418     dst    : S4(write);
 4419     dst    : S3(read);
 4420     D0     : S0;        // big decoder only
 4421     ALU    : S3;        // any alu
 4422 %}
 4423 
 4424 // Integer ALU reg-reg operation
 4425 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 4426 %{
 4427     single_instruction;
 4428     dst    : S4(write);
 4429     src    : S3(read);
 4430     DECODE : S0;        // any decoder
 4431     ALU    : S3;        // any alu
 4432 %}
 4433 
 4434 // Integer ALU reg-reg operation
 4435 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 4436 %{
 4437     single_instruction;
 4438     dst    : S4(write);
 4439     src    : S3(read);
 4440     D0     : S0;        // big decoder only
 4441     ALU    : S3;        // any alu
 4442 %}
 4443 
 4444 // Integer ALU reg-mem operation
 4445 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 4446 %{
 4447     single_instruction;
 4448     dst    : S5(write);
 4449     mem    : S3(read);
 4450     D0     : S0;        // big decoder only
 4451     ALU    : S4;        // any alu
 4452     MEM    : S3;        // any mem
 4453 %}
 4454 
 4455 // Integer mem operation (prefetch)
 4456 pipe_class ialu_mem(memory mem)
 4457 %{
 4458     single_instruction;
 4459     mem    : S3(read);
 4460     D0     : S0;        // big decoder only
 4461     MEM    : S3;        // any mem
 4462 %}
 4463 
 4464 // Integer Store to Memory
 4465 pipe_class ialu_mem_reg(memory mem, rRegI src)
 4466 %{
 4467     single_instruction;
 4468     mem    : S3(read);
 4469     src    : S5(read);
 4470     D0     : S0;        // big decoder only
 4471     ALU    : S4;        // any alu
 4472     MEM    : S3;
 4473 %}
 4474 
 4475 // // Long Store to Memory
 4476 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 4477 // %{
 4478 //     instruction_count(2);
 4479 //     mem    : S3(read);
 4480 //     src    : S5(read);
 4481 //     D0     : S0(2);          // big decoder only; twice
 4482 //     ALU    : S4(2);     // any 2 alus
 4483 //     MEM    : S3(2);  // Both mems
 4484 // %}
 4485 
 4486 // Integer Store to Memory
 4487 pipe_class ialu_mem_imm(memory mem)
 4488 %{
 4489     single_instruction;
 4490     mem    : S3(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S4;        // any alu
 4493     MEM    : S3;
 4494 %}
 4495 
 4496 // Integer ALU0 reg-reg operation
 4497 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 4498 %{
 4499     single_instruction;
 4500     dst    : S4(write);
 4501     src    : S3(read);
 4502     D0     : S0;        // Big decoder only
 4503     ALU0   : S3;        // only alu0
 4504 %}
 4505 
 4506 // Integer ALU0 reg-mem operation
 4507 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 4508 %{
 4509     single_instruction;
 4510     dst    : S5(write);
 4511     mem    : S3(read);
 4512     D0     : S0;        // big decoder only
 4513     ALU0   : S4;        // ALU0 only
 4514     MEM    : S3;        // any mem
 4515 %}
 4516 
 4517 // Integer ALU reg-reg operation
 4518 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 4519 %{
 4520     single_instruction;
 4521     cr     : S4(write);
 4522     src1   : S3(read);
 4523     src2   : S3(read);
 4524     DECODE : S0;        // any decoder
 4525     ALU    : S3;        // any alu
 4526 %}
 4527 
 4528 // Integer ALU reg-imm operation
 4529 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 4530 %{
 4531     single_instruction;
 4532     cr     : S4(write);
 4533     src1   : S3(read);
 4534     DECODE : S0;        // any decoder
 4535     ALU    : S3;        // any alu
 4536 %}
 4537 
 4538 // Integer ALU reg-mem operation
 4539 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 4540 %{
 4541     single_instruction;
 4542     cr     : S4(write);
 4543     src1   : S3(read);
 4544     src2   : S3(read);
 4545     D0     : S0;        // big decoder only
 4546     ALU    : S4;        // any alu
 4547     MEM    : S3;
 4548 %}
 4549 
 4550 // Conditional move reg-reg
 4551 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 4552 %{
 4553     instruction_count(4);
 4554     y      : S4(read);
 4555     q      : S3(read);
 4556     p      : S3(read);
 4557     DECODE : S0(4);     // any decoder
 4558 %}
 4559 
 4560 // Conditional move reg-reg
 4561 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 4562 %{
 4563     single_instruction;
 4564     dst    : S4(write);
 4565     src    : S3(read);
 4566     cr     : S3(read);
 4567     DECODE : S0;        // any decoder
 4568 %}
 4569 
 4570 // Conditional move reg-mem
 4571 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 4572 %{
 4573     single_instruction;
 4574     dst    : S4(write);
 4575     src    : S3(read);
 4576     cr     : S3(read);
 4577     DECODE : S0;        // any decoder
 4578     MEM    : S3;
 4579 %}
 4580 
 4581 // Conditional move reg-reg long
 4582 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 4583 %{
 4584     single_instruction;
 4585     dst    : S4(write);
 4586     src    : S3(read);
 4587     cr     : S3(read);
 4588     DECODE : S0(2);     // any 2 decoders
 4589 %}
 4590 
 4591 // XXX
 4592 // // Conditional move double reg-reg
 4593 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
 4594 // %{
 4595 //     single_instruction;
 4596 //     dst    : S4(write);
 4597 //     src    : S3(read);
 4598 //     cr     : S3(read);
 4599 //     DECODE : S0;     // any decoder
 4600 // %}
 4601 
 4602 // Float reg-reg operation
 4603 pipe_class fpu_reg(regD dst)
 4604 %{
 4605     instruction_count(2);
 4606     dst    : S3(read);
 4607     DECODE : S0(2);     // any 2 decoders
 4608     FPU    : S3;
 4609 %}
 4610 
 4611 // Float reg-reg operation
 4612 pipe_class fpu_reg_reg(regD dst, regD src)
 4613 %{
 4614     instruction_count(2);
 4615     dst    : S4(write);
 4616     src    : S3(read);
 4617     DECODE : S0(2);     // any 2 decoders
 4618     FPU    : S3;
 4619 %}
 4620 
 4621 // Float reg-reg operation
 4622 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 4623 %{
 4624     instruction_count(3);
 4625     dst    : S4(write);
 4626     src1   : S3(read);
 4627     src2   : S3(read);
 4628     DECODE : S0(3);     // any 3 decoders
 4629     FPU    : S3(2);
 4630 %}
 4631 
 4632 // Float reg-reg operation
 4633 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 4634 %{
 4635     instruction_count(4);
 4636     dst    : S4(write);
 4637     src1   : S3(read);
 4638     src2   : S3(read);
 4639     src3   : S3(read);
 4640     DECODE : S0(4);     // any 3 decoders
 4641     FPU    : S3(2);
 4642 %}
 4643 
 4644 // Float reg-reg operation
 4645 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 4646 %{
 4647     instruction_count(4);
 4648     dst    : S4(write);
 4649     src1   : S3(read);
 4650     src2   : S3(read);
 4651     src3   : S3(read);
 4652     DECODE : S1(3);     // any 3 decoders
 4653     D0     : S0;        // Big decoder only
 4654     FPU    : S3(2);
 4655     MEM    : S3;
 4656 %}
 4657 
 4658 // Float reg-mem operation
 4659 pipe_class fpu_reg_mem(regD dst, memory mem)
 4660 %{
 4661     instruction_count(2);
 4662     dst    : S5(write);
 4663     mem    : S3(read);
 4664     D0     : S0;        // big decoder only
 4665     DECODE : S1;        // any decoder for FPU POP
 4666     FPU    : S4;
 4667     MEM    : S3;        // any mem
 4668 %}
 4669 
 4670 // Float reg-mem operation
 4671 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 4672 %{
 4673     instruction_count(3);
 4674     dst    : S5(write);
 4675     src1   : S3(read);
 4676     mem    : S3(read);
 4677     D0     : S0;        // big decoder only
 4678     DECODE : S1(2);     // any decoder for FPU POP
 4679     FPU    : S4;
 4680     MEM    : S3;        // any mem
 4681 %}
 4682 
 4683 // Float mem-reg operation
 4684 pipe_class fpu_mem_reg(memory mem, regD src)
 4685 %{
 4686     instruction_count(2);
 4687     src    : S5(read);
 4688     mem    : S3(read);
 4689     DECODE : S0;        // any decoder for FPU PUSH
 4690     D0     : S1;        // big decoder only
 4691     FPU    : S4;
 4692     MEM    : S3;        // any mem
 4693 %}
 4694 
 4695 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 4696 %{
 4697     instruction_count(3);
 4698     src1   : S3(read);
 4699     src2   : S3(read);
 4700     mem    : S3(read);
 4701     DECODE : S0(2);     // any decoder for FPU PUSH
 4702     D0     : S1;        // big decoder only
 4703     FPU    : S4;
 4704     MEM    : S3;        // any mem
 4705 %}
 4706 
 4707 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 4708 %{
 4709     instruction_count(3);
 4710     src1   : S3(read);
 4711     src2   : S3(read);
 4712     mem    : S4(read);
 4713     DECODE : S0;        // any decoder for FPU PUSH
 4714     D0     : S0(2);     // big decoder only
 4715     FPU    : S4;
 4716     MEM    : S3(2);     // any mem
 4717 %}
 4718 
 4719 pipe_class fpu_mem_mem(memory dst, memory src1)
 4720 %{
 4721     instruction_count(2);
 4722     src1   : S3(read);
 4723     dst    : S4(read);
 4724     D0     : S0(2);     // big decoder only
 4725     MEM    : S3(2);     // any mem
 4726 %}
 4727 
 4728 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 4729 %{
 4730     instruction_count(3);
 4731     src1   : S3(read);
 4732     src2   : S3(read);
 4733     dst    : S4(read);
 4734     D0     : S0(3);     // big decoder only
 4735     FPU    : S4;
 4736     MEM    : S3(3);     // any mem
 4737 %}
 4738 
 4739 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 4740 %{
 4741     instruction_count(3);
 4742     src1   : S4(read);
 4743     mem    : S4(read);
 4744     DECODE : S0;        // any decoder for FPU PUSH
 4745     D0     : S0(2);     // big decoder only
 4746     FPU    : S4;
 4747     MEM    : S3(2);     // any mem
 4748 %}
 4749 
 4750 // Float load constant
 4751 pipe_class fpu_reg_con(regD dst)
 4752 %{
 4753     instruction_count(2);
 4754     dst    : S5(write);
 4755     D0     : S0;        // big decoder only for the load
 4756     DECODE : S1;        // any decoder for FPU POP
 4757     FPU    : S4;
 4758     MEM    : S3;        // any mem
 4759 %}
 4760 
 4761 // Float load constant
 4762 pipe_class fpu_reg_reg_con(regD dst, regD src)
 4763 %{
 4764     instruction_count(3);
 4765     dst    : S5(write);
 4766     src    : S3(read);
 4767     D0     : S0;        // big decoder only for the load
 4768     DECODE : S1(2);     // any decoder for FPU POP
 4769     FPU    : S4;
 4770     MEM    : S3;        // any mem
 4771 %}
 4772 
 4773 // UnConditional branch
 4774 pipe_class pipe_jmp(label labl)
 4775 %{
 4776     single_instruction;
 4777     BR   : S3;
 4778 %}
 4779 
 4780 // Conditional branch
 4781 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 4782 %{
 4783     single_instruction;
 4784     cr    : S1(read);
 4785     BR    : S3;
 4786 %}
 4787 
 4788 // Allocation idiom
 4789 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 4790 %{
 4791     instruction_count(1); force_serialization;
 4792     fixed_latency(6);
 4793     heap_ptr : S3(read);
 4794     DECODE   : S0(3);
 4795     D0       : S2;
 4796     MEM      : S3;
 4797     ALU      : S3(2);
 4798     dst      : S5(write);
 4799     BR       : S5;
 4800 %}
 4801 
 4802 // Generic big/slow expanded idiom
 4803 pipe_class pipe_slow()
 4804 %{
 4805     instruction_count(10); multiple_bundles; force_serialization;
 4806     fixed_latency(100);
 4807     D0  : S0(2);
 4808     MEM : S3(2);
 4809 %}
 4810 
 4811 // The real do-nothing guy
 4812 pipe_class empty()
 4813 %{
 4814     instruction_count(0);
 4815 %}
 4816 
 4817 // Define the class for the Nop node
 4818 define
 4819 %{
 4820    MachNop = empty;
 4821 %}
 4822 
 4823 %}
 4824 
 4825 //----------INSTRUCTIONS-------------------------------------------------------
 4826 //
 4827 // match      -- States which machine-independent subtree may be replaced
 4828 //               by this instruction.
 4829 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4830 //               selection to identify a minimum cost tree of machine
 4831 //               instructions that matches a tree of machine-independent
 4832 //               instructions.
 4833 // format     -- A string providing the disassembly for this instruction.
 4834 //               The value of an instruction's operand may be inserted
 4835 //               by referring to it with a '$' prefix.
 4836 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4837 //               to within an encode class as $primary, $secondary, and $tertiary
 4838 //               rrspectively.  The primary opcode is commonly used to
 4839 //               indicate the type of machine instruction, while secondary
 4840 //               and tertiary are often used for prefix options or addressing
 4841 //               modes.
 4842 // ins_encode -- A list of encode classes with parameters. The encode class
 4843 //               name must have been defined in an 'enc_class' specification
 4844 //               in the encode section of the architecture description.
 4845 
 4846 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4847 // Load Float
 4848 instruct MoveF2VL(vlRegF dst, regF src) %{
 4849   match(Set dst src);
 4850   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4851   ins_encode %{
 4852     ShouldNotReachHere();
 4853   %}
 4854   ins_pipe( fpu_reg_reg );
 4855 %}
 4856 
 4857 // Load Float
 4858 instruct MoveF2LEG(legRegF dst, regF src) %{
 4859   match(Set dst src);
 4860   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4861   ins_encode %{
 4862     ShouldNotReachHere();
 4863   %}
 4864   ins_pipe( fpu_reg_reg );
 4865 %}
 4866 
 4867 // Load Float
 4868 instruct MoveVL2F(regF dst, vlRegF src) %{
 4869   match(Set dst src);
 4870   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4871   ins_encode %{
 4872     ShouldNotReachHere();
 4873   %}
 4874   ins_pipe( fpu_reg_reg );
 4875 %}
 4876 
 4877 // Load Float
 4878 instruct MoveLEG2F(regF dst, legRegF src) %{
 4879   match(Set dst src);
 4880   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4881   ins_encode %{
 4882     ShouldNotReachHere();
 4883   %}
 4884   ins_pipe( fpu_reg_reg );
 4885 %}
 4886 
 4887 // Load Double
 4888 instruct MoveD2VL(vlRegD dst, regD src) %{
 4889   match(Set dst src);
 4890   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4891   ins_encode %{
 4892     ShouldNotReachHere();
 4893   %}
 4894   ins_pipe( fpu_reg_reg );
 4895 %}
 4896 
 4897 // Load Double
 4898 instruct MoveD2LEG(legRegD dst, regD src) %{
 4899   match(Set dst src);
 4900   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4901   ins_encode %{
 4902     ShouldNotReachHere();
 4903   %}
 4904   ins_pipe( fpu_reg_reg );
 4905 %}
 4906 
 4907 // Load Double
 4908 instruct MoveVL2D(regD dst, vlRegD src) %{
 4909   match(Set dst src);
 4910   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4911   ins_encode %{
 4912     ShouldNotReachHere();
 4913   %}
 4914   ins_pipe( fpu_reg_reg );
 4915 %}
 4916 
 4917 // Load Double
 4918 instruct MoveLEG2D(regD dst, legRegD src) %{
 4919   match(Set dst src);
 4920   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4921   ins_encode %{
 4922     ShouldNotReachHere();
 4923   %}
 4924   ins_pipe( fpu_reg_reg );
 4925 %}
 4926 
 4927 //----------Load/Store/Move Instructions---------------------------------------
 4928 //----------Load Instructions--------------------------------------------------
 4929 
 4930 // Load Byte (8 bit signed)
 4931 instruct loadB(rRegI dst, memory mem)
 4932 %{
 4933   match(Set dst (LoadB mem));
 4934 
 4935   ins_cost(125);
 4936   format %{ "movsbl  $dst, $mem\t# byte" %}
 4937 
 4938   ins_encode %{
 4939     __ movsbl($dst$$Register, $mem$$Address);
 4940   %}
 4941 
 4942   ins_pipe(ialu_reg_mem);
 4943 %}
 4944 
 4945 // Load Byte (8 bit signed) into Long Register
 4946 instruct loadB2L(rRegL dst, memory mem)
 4947 %{
 4948   match(Set dst (ConvI2L (LoadB mem)));
 4949 
 4950   ins_cost(125);
 4951   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4952 
 4953   ins_encode %{
 4954     __ movsbq($dst$$Register, $mem$$Address);
 4955   %}
 4956 
 4957   ins_pipe(ialu_reg_mem);
 4958 %}
 4959 
 4960 // Load Unsigned Byte (8 bit UNsigned)
 4961 instruct loadUB(rRegI dst, memory mem)
 4962 %{
 4963   match(Set dst (LoadUB mem));
 4964 
 4965   ins_cost(125);
 4966   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4967 
 4968   ins_encode %{
 4969     __ movzbl($dst$$Register, $mem$$Address);
 4970   %}
 4971 
 4972   ins_pipe(ialu_reg_mem);
 4973 %}
 4974 
 4975 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4976 instruct loadUB2L(rRegL dst, memory mem)
 4977 %{
 4978   match(Set dst (ConvI2L (LoadUB mem)));
 4979 
 4980   ins_cost(125);
 4981   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 4982 
 4983   ins_encode %{
 4984     __ movzbq($dst$$Register, $mem$$Address);
 4985   %}
 4986 
 4987   ins_pipe(ialu_reg_mem);
 4988 %}
 4989 
 4990 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 4991 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 4992   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 4993   effect(KILL cr);
 4994 
 4995   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 4996             "andl    $dst, right_n_bits($mask, 8)" %}
 4997   ins_encode %{
 4998     Register Rdst = $dst$$Register;
 4999     __ movzbq(Rdst, $mem$$Address);
 5000     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5001   %}
 5002   ins_pipe(ialu_reg_mem);
 5003 %}
 5004 
 5005 // Load Short (16 bit signed)
 5006 instruct loadS(rRegI dst, memory mem)
 5007 %{
 5008   match(Set dst (LoadS mem));
 5009 
 5010   ins_cost(125);
 5011   format %{ "movswl $dst, $mem\t# short" %}
 5012 
 5013   ins_encode %{
 5014     __ movswl($dst$$Register, $mem$$Address);
 5015   %}
 5016 
 5017   ins_pipe(ialu_reg_mem);
 5018 %}
 5019 
 5020 // Load Short (16 bit signed) to Byte (8 bit signed)
 5021 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5022   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5023 
 5024   ins_cost(125);
 5025   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 5026   ins_encode %{
 5027     __ movsbl($dst$$Register, $mem$$Address);
 5028   %}
 5029   ins_pipe(ialu_reg_mem);
 5030 %}
 5031 
 5032 // Load Short (16 bit signed) into Long Register
 5033 instruct loadS2L(rRegL dst, memory mem)
 5034 %{
 5035   match(Set dst (ConvI2L (LoadS mem)));
 5036 
 5037   ins_cost(125);
 5038   format %{ "movswq $dst, $mem\t# short -> long" %}
 5039 
 5040   ins_encode %{
 5041     __ movswq($dst$$Register, $mem$$Address);
 5042   %}
 5043 
 5044   ins_pipe(ialu_reg_mem);
 5045 %}
 5046 
 5047 // Load Unsigned Short/Char (16 bit UNsigned)
 5048 instruct loadUS(rRegI dst, memory mem)
 5049 %{
 5050   match(Set dst (LoadUS mem));
 5051 
 5052   ins_cost(125);
 5053   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 5054 
 5055   ins_encode %{
 5056     __ movzwl($dst$$Register, $mem$$Address);
 5057   %}
 5058 
 5059   ins_pipe(ialu_reg_mem);
 5060 %}
 5061 
 5062 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5063 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5064   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5065 
 5066   ins_cost(125);
 5067   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 5068   ins_encode %{
 5069     __ movsbl($dst$$Register, $mem$$Address);
 5070   %}
 5071   ins_pipe(ialu_reg_mem);
 5072 %}
 5073 
 5074 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5075 instruct loadUS2L(rRegL dst, memory mem)
 5076 %{
 5077   match(Set dst (ConvI2L (LoadUS mem)));
 5078 
 5079   ins_cost(125);
 5080   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 5081 
 5082   ins_encode %{
 5083     __ movzwq($dst$$Register, $mem$$Address);
 5084   %}
 5085 
 5086   ins_pipe(ialu_reg_mem);
 5087 %}
 5088 
 5089 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5090 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5091   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5092 
 5093   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 5094   ins_encode %{
 5095     __ movzbq($dst$$Register, $mem$$Address);
 5096   %}
 5097   ins_pipe(ialu_reg_mem);
 5098 %}
 5099 
 5100 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 5101 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5102   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5103   effect(KILL cr);
 5104 
 5105   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5106             "andl    $dst, right_n_bits($mask, 16)" %}
 5107   ins_encode %{
 5108     Register Rdst = $dst$$Register;
 5109     __ movzwq(Rdst, $mem$$Address);
 5110     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5111   %}
 5112   ins_pipe(ialu_reg_mem);
 5113 %}
 5114 
 5115 // Load Integer
 5116 instruct loadI(rRegI dst, memory mem)
 5117 %{
 5118   match(Set dst (LoadI mem));
 5119 
 5120   ins_cost(125);
 5121   format %{ "movl    $dst, $mem\t# int" %}
 5122 
 5123   ins_encode %{
 5124     __ movl($dst$$Register, $mem$$Address);
 5125   %}
 5126 
 5127   ins_pipe(ialu_reg_mem);
 5128 %}
 5129 
 5130 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5131 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5132   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5133 
 5134   ins_cost(125);
 5135   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 5136   ins_encode %{
 5137     __ movsbl($dst$$Register, $mem$$Address);
 5138   %}
 5139   ins_pipe(ialu_reg_mem);
 5140 %}
 5141 
 5142 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5143 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5144   match(Set dst (AndI (LoadI mem) mask));
 5145 
 5146   ins_cost(125);
 5147   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 5148   ins_encode %{
 5149     __ movzbl($dst$$Register, $mem$$Address);
 5150   %}
 5151   ins_pipe(ialu_reg_mem);
 5152 %}
 5153 
 5154 // Load Integer (32 bit signed) to Short (16 bit signed)
 5155 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5156   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5157 
 5158   ins_cost(125);
 5159   format %{ "movswl  $dst, $mem\t# int -> short" %}
 5160   ins_encode %{
 5161     __ movswl($dst$$Register, $mem$$Address);
 5162   %}
 5163   ins_pipe(ialu_reg_mem);
 5164 %}
 5165 
 5166 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5167 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5168   match(Set dst (AndI (LoadI mem) mask));
 5169 
 5170   ins_cost(125);
 5171   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 5172   ins_encode %{
 5173     __ movzwl($dst$$Register, $mem$$Address);
 5174   %}
 5175   ins_pipe(ialu_reg_mem);
 5176 %}
 5177 
 5178 // Load Integer into Long Register
 5179 instruct loadI2L(rRegL dst, memory mem)
 5180 %{
 5181   match(Set dst (ConvI2L (LoadI mem)));
 5182 
 5183   ins_cost(125);
 5184   format %{ "movslq  $dst, $mem\t# int -> long" %}
 5185 
 5186   ins_encode %{
 5187     __ movslq($dst$$Register, $mem$$Address);
 5188   %}
 5189 
 5190   ins_pipe(ialu_reg_mem);
 5191 %}
 5192 
 5193 // Load Integer with mask 0xFF into Long Register
 5194 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5195   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5196 
 5197   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 5198   ins_encode %{
 5199     __ movzbq($dst$$Register, $mem$$Address);
 5200   %}
 5201   ins_pipe(ialu_reg_mem);
 5202 %}
 5203 
 5204 // Load Integer with mask 0xFFFF into Long Register
 5205 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 5206   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5207 
 5208   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 5209   ins_encode %{
 5210     __ movzwq($dst$$Register, $mem$$Address);
 5211   %}
 5212   ins_pipe(ialu_reg_mem);
 5213 %}
 5214 
 5215 // Load Integer with a 31-bit mask into Long Register
 5216 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 5217   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5218   effect(KILL cr);
 5219 
 5220   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 5221             "andl    $dst, $mask" %}
 5222   ins_encode %{
 5223     Register Rdst = $dst$$Register;
 5224     __ movl(Rdst, $mem$$Address);
 5225     __ andl(Rdst, $mask$$constant);
 5226   %}
 5227   ins_pipe(ialu_reg_mem);
 5228 %}
 5229 
 5230 // Load Unsigned Integer into Long Register
 5231 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 5232 %{
 5233   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5234 
 5235   ins_cost(125);
 5236   format %{ "movl    $dst, $mem\t# uint -> long" %}
 5237 
 5238   ins_encode %{
 5239     __ movl($dst$$Register, $mem$$Address);
 5240   %}
 5241 
 5242   ins_pipe(ialu_reg_mem);
 5243 %}
 5244 
 5245 // Load Long
 5246 instruct loadL(rRegL dst, memory mem)
 5247 %{
 5248   match(Set dst (LoadL mem));
 5249 
 5250   ins_cost(125);
 5251   format %{ "movq    $dst, $mem\t# long" %}
 5252 
 5253   ins_encode %{
 5254     __ movq($dst$$Register, $mem$$Address);
 5255   %}
 5256 
 5257   ins_pipe(ialu_reg_mem); // XXX
 5258 %}
 5259 
 5260 // Load Range
 5261 instruct loadRange(rRegI dst, memory mem)
 5262 %{
 5263   match(Set dst (LoadRange mem));
 5264 
 5265   ins_cost(125); // XXX
 5266   format %{ "movl    $dst, $mem\t# range" %}
 5267   ins_encode %{
 5268     __ movl($dst$$Register, $mem$$Address);
 5269   %}
 5270   ins_pipe(ialu_reg_mem);
 5271 %}
 5272 
 5273 // Load Pointer
 5274 instruct loadP(rRegP dst, memory mem)
 5275 %{
 5276   match(Set dst (LoadP mem));
 5277   predicate(n->as_Load()->barrier_data() == 0);
 5278 
 5279   ins_cost(125); // XXX
 5280   format %{ "movq    $dst, $mem\t# ptr" %}
 5281   ins_encode %{
 5282     __ movq($dst$$Register, $mem$$Address);
 5283   %}
 5284   ins_pipe(ialu_reg_mem); // XXX
 5285 %}
 5286 
 5287 // Load Compressed Pointer
 5288 instruct loadN(rRegN dst, memory mem)
 5289 %{
 5290    match(Set dst (LoadN mem));
 5291 
 5292    ins_cost(125); // XXX
 5293    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 5294    ins_encode %{
 5295      __ movl($dst$$Register, $mem$$Address);
 5296    %}
 5297    ins_pipe(ialu_reg_mem); // XXX
 5298 %}
 5299 
 5300 
 5301 // Load Klass Pointer
 5302 instruct loadKlass(rRegP dst, memory mem)
 5303 %{
 5304   match(Set dst (LoadKlass mem));
 5305 
 5306   ins_cost(125); // XXX
 5307   format %{ "movq    $dst, $mem\t# class" %}
 5308   ins_encode %{
 5309     __ movq($dst$$Register, $mem$$Address);
 5310   %}
 5311   ins_pipe(ialu_reg_mem); // XXX
 5312 %}
 5313 
 5314 // Load narrow Klass Pointer
 5315 instruct loadNKlass(rRegN dst, memory mem)
 5316 %{
 5317   predicate(!UseCompactObjectHeaders);
 5318   match(Set dst (LoadNKlass mem));
 5319 
 5320   ins_cost(125); // XXX
 5321   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5322   ins_encode %{
 5323     __ movl($dst$$Register, $mem$$Address);
 5324   %}
 5325   ins_pipe(ialu_reg_mem); // XXX
 5326 %}
 5327 
 5328 instruct loadNKlassLilliput(rRegN dst, indOffset8 mem, rFlagsReg cr)
 5329 %{
 5330   predicate(UseCompactObjectHeaders);
 5331   match(Set dst (LoadNKlass mem));
 5332   effect(KILL cr);
 5333   ins_cost(125); // XXX
 5334   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5335   ins_encode %{
 5336     assert($mem$$disp == oopDesc::klass_offset_in_bytes(), "expect correct offset 4, but got: %d", $mem$$disp);
 5337     assert($mem$$index == 4, "expect no index register: %d", $mem$$index);
 5338     Register dst = $dst$$Register;
 5339     Register obj = $mem$$base$$Register;
 5340     C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
 5341     Compile::current()->output()->add_stub(stub);
 5342     __ movq(dst, Address(obj, oopDesc::mark_offset_in_bytes()));
 5343     __ testb(dst, markWord::monitor_value);
 5344     __ jcc(Assembler::notZero, stub->entry());
 5345     __ bind(stub->continuation());
 5346     __ shrq(dst, markWord::klass_shift);
 5347   %}
 5348   ins_pipe(pipe_slow); // XXX
 5349 %}
 5350 
 5351 // Load Float
 5352 instruct loadF(regF dst, memory mem)
 5353 %{
 5354   match(Set dst (LoadF mem));
 5355 
 5356   ins_cost(145); // XXX
 5357   format %{ "movss   $dst, $mem\t# float" %}
 5358   ins_encode %{
 5359     __ movflt($dst$$XMMRegister, $mem$$Address);
 5360   %}
 5361   ins_pipe(pipe_slow); // XXX
 5362 %}
 5363 
 5364 // Load Double
 5365 instruct loadD_partial(regD dst, memory mem)
 5366 %{
 5367   predicate(!UseXmmLoadAndClearUpper);
 5368   match(Set dst (LoadD mem));
 5369 
 5370   ins_cost(145); // XXX
 5371   format %{ "movlpd  $dst, $mem\t# double" %}
 5372   ins_encode %{
 5373     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5374   %}
 5375   ins_pipe(pipe_slow); // XXX
 5376 %}
 5377 
 5378 instruct loadD(regD dst, memory mem)
 5379 %{
 5380   predicate(UseXmmLoadAndClearUpper);
 5381   match(Set dst (LoadD mem));
 5382 
 5383   ins_cost(145); // XXX
 5384   format %{ "movsd   $dst, $mem\t# double" %}
 5385   ins_encode %{
 5386     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5387   %}
 5388   ins_pipe(pipe_slow); // XXX
 5389 %}
 5390 
 5391 
 5392 // Following pseudo code describes the algorithm for max[FD]:
 5393 // Min algorithm is on similar lines
 5394 //  btmp = (b < +0.0) ? a : b
 5395 //  atmp = (b < +0.0) ? b : a
 5396 //  Tmp  = Max_Float(atmp , btmp)
 5397 //  Res  = (atmp == NaN) ? atmp : Tmp
 5398 
 5399 // max = java.lang.Math.max(float a, float b)
 5400 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5401   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5402   match(Set dst (MaxF a b));
 5403   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5404   format %{
 5405      "vblendvps        $btmp,$b,$a,$b           \n\t"
 5406      "vblendvps        $atmp,$a,$b,$b           \n\t"
 5407      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 5408      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 5409      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 5410   %}
 5411   ins_encode %{
 5412     int vector_len = Assembler::AVX_128bit;
 5413     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5414     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5415     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5416     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5417     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5418  %}
 5419   ins_pipe( pipe_slow );
 5420 %}
 5421 
 5422 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5423   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5424   match(Set dst (MaxF a b));
 5425   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5426 
 5427   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 5428   ins_encode %{
 5429     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5430                     false /*min*/, true /*single*/);
 5431   %}
 5432   ins_pipe( pipe_slow );
 5433 %}
 5434 
 5435 // max = java.lang.Math.max(double a, double b)
 5436 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5437   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5438   match(Set dst (MaxD a b));
 5439   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 5440   format %{
 5441      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 5442      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 5443      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 5444      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 5445      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 5446   %}
 5447   ins_encode %{
 5448     int vector_len = Assembler::AVX_128bit;
 5449     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5450     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5451     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5452     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5453     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5454   %}
 5455   ins_pipe( pipe_slow );
 5456 %}
 5457 
 5458 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5459   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5460   match(Set dst (MaxD a b));
 5461   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5462 
 5463   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 5464   ins_encode %{
 5465     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5466                     false /*min*/, false /*single*/);
 5467   %}
 5468   ins_pipe( pipe_slow );
 5469 %}
 5470 
 5471 // min = java.lang.Math.min(float a, float b)
 5472 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5473   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5474   match(Set dst (MinF a b));
 5475   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5476   format %{
 5477      "vblendvps        $atmp,$a,$b,$a             \n\t"
 5478      "vblendvps        $btmp,$b,$a,$a             \n\t"
 5479      "vminss           $tmp,$atmp,$btmp           \n\t"
 5480      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 5481      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 5482   %}
 5483   ins_encode %{
 5484     int vector_len = Assembler::AVX_128bit;
 5485     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5486     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5487     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5488     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5489     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5490   %}
 5491   ins_pipe( pipe_slow );
 5492 %}
 5493 
 5494 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5495   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5496   match(Set dst (MinF a b));
 5497   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5498 
 5499   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 5500   ins_encode %{
 5501     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5502                     true /*min*/, true /*single*/);
 5503   %}
 5504   ins_pipe( pipe_slow );
 5505 %}
 5506 
 5507 // min = java.lang.Math.min(double a, double b)
 5508 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5509   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 5510   match(Set dst (MinD a b));
 5511   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5512   format %{
 5513      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 5514      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 5515      "vminsd           $tmp,$atmp,$btmp         \n\t"
 5516      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 5517      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 5518   %}
 5519   ins_encode %{
 5520     int vector_len = Assembler::AVX_128bit;
 5521     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5522     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5523     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5524     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5525     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5526   %}
 5527   ins_pipe( pipe_slow );
 5528 %}
 5529 
 5530 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5531   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 5532   match(Set dst (MinD a b));
 5533   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5534 
 5535   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 5536   ins_encode %{
 5537     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5538                     true /*min*/, false /*single*/);
 5539   %}
 5540   ins_pipe( pipe_slow );
 5541 %}
 5542 
 5543 // Load Effective Address
 5544 instruct leaP8(rRegP dst, indOffset8 mem)
 5545 %{
 5546   match(Set dst mem);
 5547 
 5548   ins_cost(110); // XXX
 5549   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 5550   ins_encode %{
 5551     __ leaq($dst$$Register, $mem$$Address);
 5552   %}
 5553   ins_pipe(ialu_reg_reg_fat);
 5554 %}
 5555 
 5556 instruct leaP32(rRegP dst, indOffset32 mem)
 5557 %{
 5558   match(Set dst mem);
 5559 
 5560   ins_cost(110);
 5561   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 5562   ins_encode %{
 5563     __ leaq($dst$$Register, $mem$$Address);
 5564   %}
 5565   ins_pipe(ialu_reg_reg_fat);
 5566 %}
 5567 
 5568 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 5569 %{
 5570   match(Set dst mem);
 5571 
 5572   ins_cost(110);
 5573   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 5574   ins_encode %{
 5575     __ leaq($dst$$Register, $mem$$Address);
 5576   %}
 5577   ins_pipe(ialu_reg_reg_fat);
 5578 %}
 5579 
 5580 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 5581 %{
 5582   match(Set dst mem);
 5583 
 5584   ins_cost(110);
 5585   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5586   ins_encode %{
 5587     __ leaq($dst$$Register, $mem$$Address);
 5588   %}
 5589   ins_pipe(ialu_reg_reg_fat);
 5590 %}
 5591 
 5592 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 5593 %{
 5594   match(Set dst mem);
 5595 
 5596   ins_cost(110);
 5597   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5598   ins_encode %{
 5599     __ leaq($dst$$Register, $mem$$Address);
 5600   %}
 5601   ins_pipe(ialu_reg_reg_fat);
 5602 %}
 5603 
 5604 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 5605 %{
 5606   match(Set dst mem);
 5607 
 5608   ins_cost(110);
 5609   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 5610   ins_encode %{
 5611     __ leaq($dst$$Register, $mem$$Address);
 5612   %}
 5613   ins_pipe(ialu_reg_reg_fat);
 5614 %}
 5615 
 5616 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 5617 %{
 5618   match(Set dst mem);
 5619 
 5620   ins_cost(110);
 5621   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 5622   ins_encode %{
 5623     __ leaq($dst$$Register, $mem$$Address);
 5624   %}
 5625   ins_pipe(ialu_reg_reg_fat);
 5626 %}
 5627 
 5628 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 5629 %{
 5630   match(Set dst mem);
 5631 
 5632   ins_cost(110);
 5633   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 5634   ins_encode %{
 5635     __ leaq($dst$$Register, $mem$$Address);
 5636   %}
 5637   ins_pipe(ialu_reg_reg_fat);
 5638 %}
 5639 
 5640 // Load Effective Address which uses Narrow (32-bits) oop
 5641 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 5642 %{
 5643   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 5644   match(Set dst mem);
 5645 
 5646   ins_cost(110);
 5647   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 5648   ins_encode %{
 5649     __ leaq($dst$$Register, $mem$$Address);
 5650   %}
 5651   ins_pipe(ialu_reg_reg_fat);
 5652 %}
 5653 
 5654 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 5655 %{
 5656   predicate(CompressedOops::shift() == 0);
 5657   match(Set dst mem);
 5658 
 5659   ins_cost(110); // XXX
 5660   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 5661   ins_encode %{
 5662     __ leaq($dst$$Register, $mem$$Address);
 5663   %}
 5664   ins_pipe(ialu_reg_reg_fat);
 5665 %}
 5666 
 5667 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 5668 %{
 5669   predicate(CompressedOops::shift() == 0);
 5670   match(Set dst mem);
 5671 
 5672   ins_cost(110);
 5673   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 5674   ins_encode %{
 5675     __ leaq($dst$$Register, $mem$$Address);
 5676   %}
 5677   ins_pipe(ialu_reg_reg_fat);
 5678 %}
 5679 
 5680 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 5681 %{
 5682   predicate(CompressedOops::shift() == 0);
 5683   match(Set dst mem);
 5684 
 5685   ins_cost(110);
 5686   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 5687   ins_encode %{
 5688     __ leaq($dst$$Register, $mem$$Address);
 5689   %}
 5690   ins_pipe(ialu_reg_reg_fat);
 5691 %}
 5692 
 5693 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 5694 %{
 5695   predicate(CompressedOops::shift() == 0);
 5696   match(Set dst mem);
 5697 
 5698   ins_cost(110);
 5699   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 5700   ins_encode %{
 5701     __ leaq($dst$$Register, $mem$$Address);
 5702   %}
 5703   ins_pipe(ialu_reg_reg_fat);
 5704 %}
 5705 
 5706 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 5707 %{
 5708   predicate(CompressedOops::shift() == 0);
 5709   match(Set dst mem);
 5710 
 5711   ins_cost(110);
 5712   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 5713   ins_encode %{
 5714     __ leaq($dst$$Register, $mem$$Address);
 5715   %}
 5716   ins_pipe(ialu_reg_reg_fat);
 5717 %}
 5718 
 5719 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 5720 %{
 5721   predicate(CompressedOops::shift() == 0);
 5722   match(Set dst mem);
 5723 
 5724   ins_cost(110);
 5725   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 5726   ins_encode %{
 5727     __ leaq($dst$$Register, $mem$$Address);
 5728   %}
 5729   ins_pipe(ialu_reg_reg_fat);
 5730 %}
 5731 
 5732 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 5733 %{
 5734   predicate(CompressedOops::shift() == 0);
 5735   match(Set dst mem);
 5736 
 5737   ins_cost(110);
 5738   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 5739   ins_encode %{
 5740     __ leaq($dst$$Register, $mem$$Address);
 5741   %}
 5742   ins_pipe(ialu_reg_reg_fat);
 5743 %}
 5744 
 5745 instruct loadConI(rRegI dst, immI src)
 5746 %{
 5747   match(Set dst src);
 5748 
 5749   format %{ "movl    $dst, $src\t# int" %}
 5750   ins_encode %{
 5751     __ movl($dst$$Register, $src$$constant);
 5752   %}
 5753   ins_pipe(ialu_reg_fat); // XXX
 5754 %}
 5755 
 5756 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 5757 %{
 5758   match(Set dst src);
 5759   effect(KILL cr);
 5760 
 5761   ins_cost(50);
 5762   format %{ "xorl    $dst, $dst\t# int" %}
 5763   ins_encode %{
 5764     __ xorl($dst$$Register, $dst$$Register);
 5765   %}
 5766   ins_pipe(ialu_reg);
 5767 %}
 5768 
 5769 instruct loadConL(rRegL dst, immL src)
 5770 %{
 5771   match(Set dst src);
 5772 
 5773   ins_cost(150);
 5774   format %{ "movq    $dst, $src\t# long" %}
 5775   ins_encode %{
 5776     __ mov64($dst$$Register, $src$$constant);
 5777   %}
 5778   ins_pipe(ialu_reg);
 5779 %}
 5780 
 5781 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 5782 %{
 5783   match(Set dst src);
 5784   effect(KILL cr);
 5785 
 5786   ins_cost(50);
 5787   format %{ "xorl    $dst, $dst\t# long" %}
 5788   ins_encode %{
 5789     __ xorl($dst$$Register, $dst$$Register);
 5790   %}
 5791   ins_pipe(ialu_reg); // XXX
 5792 %}
 5793 
 5794 instruct loadConUL32(rRegL dst, immUL32 src)
 5795 %{
 5796   match(Set dst src);
 5797 
 5798   ins_cost(60);
 5799   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 5800   ins_encode %{
 5801     __ movl($dst$$Register, $src$$constant);
 5802   %}
 5803   ins_pipe(ialu_reg);
 5804 %}
 5805 
 5806 instruct loadConL32(rRegL dst, immL32 src)
 5807 %{
 5808   match(Set dst src);
 5809 
 5810   ins_cost(70);
 5811   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 5812   ins_encode %{
 5813     __ movq($dst$$Register, $src$$constant);
 5814   %}
 5815   ins_pipe(ialu_reg);
 5816 %}
 5817 
 5818 instruct loadConP(rRegP dst, immP con) %{
 5819   match(Set dst con);
 5820 
 5821   format %{ "movq    $dst, $con\t# ptr" %}
 5822   ins_encode %{
 5823     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 5824   %}
 5825   ins_pipe(ialu_reg_fat); // XXX
 5826 %}
 5827 
 5828 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 5829 %{
 5830   match(Set dst src);
 5831   effect(KILL cr);
 5832 
 5833   ins_cost(50);
 5834   format %{ "xorl    $dst, $dst\t# ptr" %}
 5835   ins_encode %{
 5836     __ xorl($dst$$Register, $dst$$Register);
 5837   %}
 5838   ins_pipe(ialu_reg);
 5839 %}
 5840 
 5841 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 5842 %{
 5843   match(Set dst src);
 5844   effect(KILL cr);
 5845 
 5846   ins_cost(60);
 5847   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5848   ins_encode %{
 5849     __ movl($dst$$Register, $src$$constant);
 5850   %}
 5851   ins_pipe(ialu_reg);
 5852 %}
 5853 
 5854 instruct loadConF(regF dst, immF con) %{
 5855   match(Set dst con);
 5856   ins_cost(125);
 5857   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5858   ins_encode %{
 5859     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5860   %}
 5861   ins_pipe(pipe_slow);
 5862 %}
 5863 
 5864 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5865   match(Set dst src);
 5866   effect(KILL cr);
 5867   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 5868   ins_encode %{
 5869     __ xorq($dst$$Register, $dst$$Register);
 5870   %}
 5871   ins_pipe(ialu_reg);
 5872 %}
 5873 
 5874 instruct loadConN(rRegN dst, immN src) %{
 5875   match(Set dst src);
 5876 
 5877   ins_cost(125);
 5878   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5879   ins_encode %{
 5880     address con = (address)$src$$constant;
 5881     if (con == NULL) {
 5882       ShouldNotReachHere();
 5883     } else {
 5884       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5885     }
 5886   %}
 5887   ins_pipe(ialu_reg_fat); // XXX
 5888 %}
 5889 
 5890 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5891   match(Set dst src);
 5892 
 5893   ins_cost(125);
 5894   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5895   ins_encode %{
 5896     address con = (address)$src$$constant;
 5897     if (con == NULL) {
 5898       ShouldNotReachHere();
 5899     } else {
 5900       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5901     }
 5902   %}
 5903   ins_pipe(ialu_reg_fat); // XXX
 5904 %}
 5905 
 5906 instruct loadConF0(regF dst, immF0 src)
 5907 %{
 5908   match(Set dst src);
 5909   ins_cost(100);
 5910 
 5911   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5912   ins_encode %{
 5913     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5914   %}
 5915   ins_pipe(pipe_slow);
 5916 %}
 5917 
 5918 // Use the same format since predicate() can not be used here.
 5919 instruct loadConD(regD dst, immD con) %{
 5920   match(Set dst con);
 5921   ins_cost(125);
 5922   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5923   ins_encode %{
 5924     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5925   %}
 5926   ins_pipe(pipe_slow);
 5927 %}
 5928 
 5929 instruct loadConD0(regD dst, immD0 src)
 5930 %{
 5931   match(Set dst src);
 5932   ins_cost(100);
 5933 
 5934   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5935   ins_encode %{
 5936     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5937   %}
 5938   ins_pipe(pipe_slow);
 5939 %}
 5940 
 5941 instruct loadSSI(rRegI dst, stackSlotI src)
 5942 %{
 5943   match(Set dst src);
 5944 
 5945   ins_cost(125);
 5946   format %{ "movl    $dst, $src\t# int stk" %}
 5947   opcode(0x8B);
 5948   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
 5949   ins_pipe(ialu_reg_mem);
 5950 %}
 5951 
 5952 instruct loadSSL(rRegL dst, stackSlotL src)
 5953 %{
 5954   match(Set dst src);
 5955 
 5956   ins_cost(125);
 5957   format %{ "movq    $dst, $src\t# long stk" %}
 5958   opcode(0x8B);
 5959   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5960   ins_pipe(ialu_reg_mem);
 5961 %}
 5962 
 5963 instruct loadSSP(rRegP dst, stackSlotP src)
 5964 %{
 5965   match(Set dst src);
 5966 
 5967   ins_cost(125);
 5968   format %{ "movq    $dst, $src\t# ptr stk" %}
 5969   opcode(0x8B);
 5970   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5971   ins_pipe(ialu_reg_mem);
 5972 %}
 5973 
 5974 instruct loadSSF(regF dst, stackSlotF src)
 5975 %{
 5976   match(Set dst src);
 5977 
 5978   ins_cost(125);
 5979   format %{ "movss   $dst, $src\t# float stk" %}
 5980   ins_encode %{
 5981     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5982   %}
 5983   ins_pipe(pipe_slow); // XXX
 5984 %}
 5985 
 5986 // Use the same format since predicate() can not be used here.
 5987 instruct loadSSD(regD dst, stackSlotD src)
 5988 %{
 5989   match(Set dst src);
 5990 
 5991   ins_cost(125);
 5992   format %{ "movsd   $dst, $src\t# double stk" %}
 5993   ins_encode  %{
 5994     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5995   %}
 5996   ins_pipe(pipe_slow); // XXX
 5997 %}
 5998 
 5999 // Prefetch instructions for allocation.
 6000 // Must be safe to execute with invalid address (cannot fault).
 6001 
 6002 instruct prefetchAlloc( memory mem ) %{
 6003   predicate(AllocatePrefetchInstr==3);
 6004   match(PrefetchAllocation mem);
 6005   ins_cost(125);
 6006 
 6007   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 6008   ins_encode %{
 6009     __ prefetchw($mem$$Address);
 6010   %}
 6011   ins_pipe(ialu_mem);
 6012 %}
 6013 
 6014 instruct prefetchAllocNTA( memory mem ) %{
 6015   predicate(AllocatePrefetchInstr==0);
 6016   match(PrefetchAllocation mem);
 6017   ins_cost(125);
 6018 
 6019   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 6020   ins_encode %{
 6021     __ prefetchnta($mem$$Address);
 6022   %}
 6023   ins_pipe(ialu_mem);
 6024 %}
 6025 
 6026 instruct prefetchAllocT0( memory mem ) %{
 6027   predicate(AllocatePrefetchInstr==1);
 6028   match(PrefetchAllocation mem);
 6029   ins_cost(125);
 6030 
 6031   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 6032   ins_encode %{
 6033     __ prefetcht0($mem$$Address);
 6034   %}
 6035   ins_pipe(ialu_mem);
 6036 %}
 6037 
 6038 instruct prefetchAllocT2( memory mem ) %{
 6039   predicate(AllocatePrefetchInstr==2);
 6040   match(PrefetchAllocation mem);
 6041   ins_cost(125);
 6042 
 6043   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 6044   ins_encode %{
 6045     __ prefetcht2($mem$$Address);
 6046   %}
 6047   ins_pipe(ialu_mem);
 6048 %}
 6049 
 6050 //----------Store Instructions-------------------------------------------------
 6051 
 6052 // Store Byte
 6053 instruct storeB(memory mem, rRegI src)
 6054 %{
 6055   match(Set mem (StoreB mem src));
 6056 
 6057   ins_cost(125); // XXX
 6058   format %{ "movb    $mem, $src\t# byte" %}
 6059   ins_encode %{
 6060     __ movb($mem$$Address, $src$$Register);
 6061   %}
 6062   ins_pipe(ialu_mem_reg);
 6063 %}
 6064 
 6065 // Store Char/Short
 6066 instruct storeC(memory mem, rRegI src)
 6067 %{
 6068   match(Set mem (StoreC mem src));
 6069 
 6070   ins_cost(125); // XXX
 6071   format %{ "movw    $mem, $src\t# char/short" %}
 6072   ins_encode %{
 6073     __ movw($mem$$Address, $src$$Register);
 6074   %}
 6075   ins_pipe(ialu_mem_reg);
 6076 %}
 6077 
 6078 // Store Integer
 6079 instruct storeI(memory mem, rRegI src)
 6080 %{
 6081   match(Set mem (StoreI mem src));
 6082 
 6083   ins_cost(125); // XXX
 6084   format %{ "movl    $mem, $src\t# int" %}
 6085   ins_encode %{
 6086     __ movl($mem$$Address, $src$$Register);
 6087   %}
 6088   ins_pipe(ialu_mem_reg);
 6089 %}
 6090 
 6091 // Store Long
 6092 instruct storeL(memory mem, rRegL src)
 6093 %{
 6094   match(Set mem (StoreL mem src));
 6095 
 6096   ins_cost(125); // XXX
 6097   format %{ "movq    $mem, $src\t# long" %}
 6098   ins_encode %{
 6099     __ movq($mem$$Address, $src$$Register);
 6100   %}
 6101   ins_pipe(ialu_mem_reg); // XXX
 6102 %}
 6103 
 6104 // Store Pointer
 6105 instruct storeP(memory mem, any_RegP src)
 6106 %{
 6107   predicate(n->as_Store()->barrier_data() == 0);
 6108   match(Set mem (StoreP mem src));
 6109 
 6110   ins_cost(125); // XXX
 6111   format %{ "movq    $mem, $src\t# ptr" %}
 6112   ins_encode %{
 6113     __ movq($mem$$Address, $src$$Register);
 6114   %}
 6115   ins_pipe(ialu_mem_reg);
 6116 %}
 6117 
 6118 instruct storeImmP0(memory mem, immP0 zero)
 6119 %{
 6120   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
 6121   match(Set mem (StoreP mem zero));
 6122 
 6123   ins_cost(125); // XXX
 6124   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6125   ins_encode %{
 6126     __ movq($mem$$Address, r12);
 6127   %}
 6128   ins_pipe(ialu_mem_reg);
 6129 %}
 6130 
 6131 // Store NULL Pointer, mark word, or other simple pointer constant.
 6132 instruct storeImmP(memory mem, immP31 src)
 6133 %{
 6134   predicate(n->as_Store()->barrier_data() == 0);
 6135   match(Set mem (StoreP mem src));
 6136 
 6137   ins_cost(150); // XXX
 6138   format %{ "movq    $mem, $src\t# ptr" %}
 6139   ins_encode %{
 6140     __ movq($mem$$Address, $src$$constant);
 6141   %}
 6142   ins_pipe(ialu_mem_imm);
 6143 %}
 6144 
 6145 // Store Compressed Pointer
 6146 instruct storeN(memory mem, rRegN src)
 6147 %{
 6148   match(Set mem (StoreN mem src));
 6149 
 6150   ins_cost(125); // XXX
 6151   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6152   ins_encode %{
 6153     __ movl($mem$$Address, $src$$Register);
 6154   %}
 6155   ins_pipe(ialu_mem_reg);
 6156 %}
 6157 
 6158 instruct storeNKlass(memory mem, rRegN src)
 6159 %{
 6160   match(Set mem (StoreNKlass mem src));
 6161 
 6162   ins_cost(125); // XXX
 6163   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6164   ins_encode %{
 6165     __ movl($mem$$Address, $src$$Register);
 6166   %}
 6167   ins_pipe(ialu_mem_reg);
 6168 %}
 6169 
 6170 instruct storeImmN0(memory mem, immN0 zero)
 6171 %{
 6172   predicate(CompressedOops::base() == NULL);
 6173   match(Set mem (StoreN mem zero));
 6174 
 6175   ins_cost(125); // XXX
 6176   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6177   ins_encode %{
 6178     __ movl($mem$$Address, r12);
 6179   %}
 6180   ins_pipe(ialu_mem_reg);
 6181 %}
 6182 
 6183 instruct storeImmN(memory mem, immN src)
 6184 %{
 6185   match(Set mem (StoreN mem src));
 6186 
 6187   ins_cost(150); // XXX
 6188   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6189   ins_encode %{
 6190     address con = (address)$src$$constant;
 6191     if (con == NULL) {
 6192       __ movl($mem$$Address, 0);
 6193     } else {
 6194       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6195     }
 6196   %}
 6197   ins_pipe(ialu_mem_imm);
 6198 %}
 6199 
 6200 instruct storeImmNKlass(memory mem, immNKlass src)
 6201 %{
 6202   match(Set mem (StoreNKlass mem src));
 6203 
 6204   ins_cost(150); // XXX
 6205   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6206   ins_encode %{
 6207     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6208   %}
 6209   ins_pipe(ialu_mem_imm);
 6210 %}
 6211 
 6212 // Store Integer Immediate
 6213 instruct storeImmI0(memory mem, immI_0 zero)
 6214 %{
 6215   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6216   match(Set mem (StoreI mem zero));
 6217 
 6218   ins_cost(125); // XXX
 6219   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6220   ins_encode %{
 6221     __ movl($mem$$Address, r12);
 6222   %}
 6223   ins_pipe(ialu_mem_reg);
 6224 %}
 6225 
 6226 instruct storeImmI(memory mem, immI src)
 6227 %{
 6228   match(Set mem (StoreI mem src));
 6229 
 6230   ins_cost(150);
 6231   format %{ "movl    $mem, $src\t# int" %}
 6232   ins_encode %{
 6233     __ movl($mem$$Address, $src$$constant);
 6234   %}
 6235   ins_pipe(ialu_mem_imm);
 6236 %}
 6237 
 6238 // Store Long Immediate
 6239 instruct storeImmL0(memory mem, immL0 zero)
 6240 %{
 6241   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6242   match(Set mem (StoreL mem zero));
 6243 
 6244   ins_cost(125); // XXX
 6245   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6246   ins_encode %{
 6247     __ movq($mem$$Address, r12);
 6248   %}
 6249   ins_pipe(ialu_mem_reg);
 6250 %}
 6251 
 6252 instruct storeImmL(memory mem, immL32 src)
 6253 %{
 6254   match(Set mem (StoreL mem src));
 6255 
 6256   ins_cost(150);
 6257   format %{ "movq    $mem, $src\t# long" %}
 6258   ins_encode %{
 6259     __ movq($mem$$Address, $src$$constant);
 6260   %}
 6261   ins_pipe(ialu_mem_imm);
 6262 %}
 6263 
 6264 // Store Short/Char Immediate
 6265 instruct storeImmC0(memory mem, immI_0 zero)
 6266 %{
 6267   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6268   match(Set mem (StoreC mem zero));
 6269 
 6270   ins_cost(125); // XXX
 6271   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6272   ins_encode %{
 6273     __ movw($mem$$Address, r12);
 6274   %}
 6275   ins_pipe(ialu_mem_reg);
 6276 %}
 6277 
 6278 instruct storeImmI16(memory mem, immI16 src)
 6279 %{
 6280   predicate(UseStoreImmI16);
 6281   match(Set mem (StoreC mem src));
 6282 
 6283   ins_cost(150);
 6284   format %{ "movw    $mem, $src\t# short/char" %}
 6285   ins_encode %{
 6286     __ movw($mem$$Address, $src$$constant);
 6287   %}
 6288   ins_pipe(ialu_mem_imm);
 6289 %}
 6290 
 6291 // Store Byte Immediate
 6292 instruct storeImmB0(memory mem, immI_0 zero)
 6293 %{
 6294   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6295   match(Set mem (StoreB mem zero));
 6296 
 6297   ins_cost(125); // XXX
 6298   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6299   ins_encode %{
 6300     __ movb($mem$$Address, r12);
 6301   %}
 6302   ins_pipe(ialu_mem_reg);
 6303 %}
 6304 
 6305 instruct storeImmB(memory mem, immI8 src)
 6306 %{
 6307   match(Set mem (StoreB mem src));
 6308 
 6309   ins_cost(150); // XXX
 6310   format %{ "movb    $mem, $src\t# byte" %}
 6311   ins_encode %{
 6312     __ movb($mem$$Address, $src$$constant);
 6313   %}
 6314   ins_pipe(ialu_mem_imm);
 6315 %}
 6316 
 6317 // Store CMS card-mark Immediate
 6318 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6319 %{
 6320   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6321   match(Set mem (StoreCM mem zero));
 6322 
 6323   ins_cost(125); // XXX
 6324   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6325   ins_encode %{
 6326     __ movb($mem$$Address, r12);
 6327   %}
 6328   ins_pipe(ialu_mem_reg);
 6329 %}
 6330 
 6331 instruct storeImmCM0(memory mem, immI_0 src)
 6332 %{
 6333   match(Set mem (StoreCM mem src));
 6334 
 6335   ins_cost(150); // XXX
 6336   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6337   ins_encode %{
 6338     __ movb($mem$$Address, $src$$constant);
 6339   %}
 6340   ins_pipe(ialu_mem_imm);
 6341 %}
 6342 
 6343 // Store Float
 6344 instruct storeF(memory mem, regF src)
 6345 %{
 6346   match(Set mem (StoreF mem src));
 6347 
 6348   ins_cost(95); // XXX
 6349   format %{ "movss   $mem, $src\t# float" %}
 6350   ins_encode %{
 6351     __ movflt($mem$$Address, $src$$XMMRegister);
 6352   %}
 6353   ins_pipe(pipe_slow); // XXX
 6354 %}
 6355 
 6356 // Store immediate Float value (it is faster than store from XMM register)
 6357 instruct storeF0(memory mem, immF0 zero)
 6358 %{
 6359   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6360   match(Set mem (StoreF mem zero));
 6361 
 6362   ins_cost(25); // XXX
 6363   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6364   ins_encode %{
 6365     __ movl($mem$$Address, r12);
 6366   %}
 6367   ins_pipe(ialu_mem_reg);
 6368 %}
 6369 
 6370 instruct storeF_imm(memory mem, immF src)
 6371 %{
 6372   match(Set mem (StoreF mem src));
 6373 
 6374   ins_cost(50);
 6375   format %{ "movl    $mem, $src\t# float" %}
 6376   ins_encode %{
 6377     __ movl($mem$$Address, jint_cast($src$$constant));
 6378   %}
 6379   ins_pipe(ialu_mem_imm);
 6380 %}
 6381 
 6382 // Store Double
 6383 instruct storeD(memory mem, regD src)
 6384 %{
 6385   match(Set mem (StoreD mem src));
 6386 
 6387   ins_cost(95); // XXX
 6388   format %{ "movsd   $mem, $src\t# double" %}
 6389   ins_encode %{
 6390     __ movdbl($mem$$Address, $src$$XMMRegister);
 6391   %}
 6392   ins_pipe(pipe_slow); // XXX
 6393 %}
 6394 
 6395 // Store immediate double 0.0 (it is faster than store from XMM register)
 6396 instruct storeD0_imm(memory mem, immD0 src)
 6397 %{
 6398   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 6399   match(Set mem (StoreD mem src));
 6400 
 6401   ins_cost(50);
 6402   format %{ "movq    $mem, $src\t# double 0." %}
 6403   ins_encode %{
 6404     __ movq($mem$$Address, $src$$constant);
 6405   %}
 6406   ins_pipe(ialu_mem_imm);
 6407 %}
 6408 
 6409 instruct storeD0(memory mem, immD0 zero)
 6410 %{
 6411   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6412   match(Set mem (StoreD mem zero));
 6413 
 6414   ins_cost(25); // XXX
 6415   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6416   ins_encode %{
 6417     __ movq($mem$$Address, r12);
 6418   %}
 6419   ins_pipe(ialu_mem_reg);
 6420 %}
 6421 
 6422 instruct storeSSI(stackSlotI dst, rRegI src)
 6423 %{
 6424   match(Set dst src);
 6425 
 6426   ins_cost(100);
 6427   format %{ "movl    $dst, $src\t# int stk" %}
 6428   opcode(0x89);
 6429   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6430   ins_pipe( ialu_mem_reg );
 6431 %}
 6432 
 6433 instruct storeSSL(stackSlotL dst, rRegL src)
 6434 %{
 6435   match(Set dst src);
 6436 
 6437   ins_cost(100);
 6438   format %{ "movq    $dst, $src\t# long stk" %}
 6439   opcode(0x89);
 6440   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6441   ins_pipe(ialu_mem_reg);
 6442 %}
 6443 
 6444 instruct storeSSP(stackSlotP dst, rRegP src)
 6445 %{
 6446   match(Set dst src);
 6447 
 6448   ins_cost(100);
 6449   format %{ "movq    $dst, $src\t# ptr stk" %}
 6450   opcode(0x89);
 6451   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6452   ins_pipe(ialu_mem_reg);
 6453 %}
 6454 
 6455 instruct storeSSF(stackSlotF dst, regF src)
 6456 %{
 6457   match(Set dst src);
 6458 
 6459   ins_cost(95); // XXX
 6460   format %{ "movss   $dst, $src\t# float stk" %}
 6461   ins_encode %{
 6462     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6463   %}
 6464   ins_pipe(pipe_slow); // XXX
 6465 %}
 6466 
 6467 instruct storeSSD(stackSlotD dst, regD src)
 6468 %{
 6469   match(Set dst src);
 6470 
 6471   ins_cost(95); // XXX
 6472   format %{ "movsd   $dst, $src\t# double stk" %}
 6473   ins_encode %{
 6474     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6475   %}
 6476   ins_pipe(pipe_slow); // XXX
 6477 %}
 6478 
 6479 instruct cacheWB(indirect addr)
 6480 %{
 6481   predicate(VM_Version::supports_data_cache_line_flush());
 6482   match(CacheWB addr);
 6483 
 6484   ins_cost(100);
 6485   format %{"cache wb $addr" %}
 6486   ins_encode %{
 6487     assert($addr->index_position() < 0, "should be");
 6488     assert($addr$$disp == 0, "should be");
 6489     __ cache_wb(Address($addr$$base$$Register, 0));
 6490   %}
 6491   ins_pipe(pipe_slow); // XXX
 6492 %}
 6493 
 6494 instruct cacheWBPreSync()
 6495 %{
 6496   predicate(VM_Version::supports_data_cache_line_flush());
 6497   match(CacheWBPreSync);
 6498 
 6499   ins_cost(100);
 6500   format %{"cache wb presync" %}
 6501   ins_encode %{
 6502     __ cache_wbsync(true);
 6503   %}
 6504   ins_pipe(pipe_slow); // XXX
 6505 %}
 6506 
 6507 instruct cacheWBPostSync()
 6508 %{
 6509   predicate(VM_Version::supports_data_cache_line_flush());
 6510   match(CacheWBPostSync);
 6511 
 6512   ins_cost(100);
 6513   format %{"cache wb postsync" %}
 6514   ins_encode %{
 6515     __ cache_wbsync(false);
 6516   %}
 6517   ins_pipe(pipe_slow); // XXX
 6518 %}
 6519 
 6520 //----------BSWAP Instructions-------------------------------------------------
 6521 instruct bytes_reverse_int(rRegI dst) %{
 6522   match(Set dst (ReverseBytesI dst));
 6523 
 6524   format %{ "bswapl  $dst" %}
 6525   ins_encode %{
 6526     __ bswapl($dst$$Register);
 6527   %}
 6528   ins_pipe( ialu_reg );
 6529 %}
 6530 
 6531 instruct bytes_reverse_long(rRegL dst) %{
 6532   match(Set dst (ReverseBytesL dst));
 6533 
 6534   format %{ "bswapq  $dst" %}
 6535   ins_encode %{
 6536     __ bswapq($dst$$Register);
 6537   %}
 6538   ins_pipe( ialu_reg);
 6539 %}
 6540 
 6541 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 6542   match(Set dst (ReverseBytesUS dst));
 6543   effect(KILL cr);
 6544 
 6545   format %{ "bswapl  $dst\n\t"
 6546             "shrl    $dst,16\n\t" %}
 6547   ins_encode %{
 6548     __ bswapl($dst$$Register);
 6549     __ shrl($dst$$Register, 16);
 6550   %}
 6551   ins_pipe( ialu_reg );
 6552 %}
 6553 
 6554 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 6555   match(Set dst (ReverseBytesS dst));
 6556   effect(KILL cr);
 6557 
 6558   format %{ "bswapl  $dst\n\t"
 6559             "sar     $dst,16\n\t" %}
 6560   ins_encode %{
 6561     __ bswapl($dst$$Register);
 6562     __ sarl($dst$$Register, 16);
 6563   %}
 6564   ins_pipe( ialu_reg );
 6565 %}
 6566 
 6567 //---------- Zeros Count Instructions ------------------------------------------
 6568 
 6569 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6570   predicate(UseCountLeadingZerosInstruction);
 6571   match(Set dst (CountLeadingZerosI src));
 6572   effect(KILL cr);
 6573 
 6574   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6575   ins_encode %{
 6576     __ lzcntl($dst$$Register, $src$$Register);
 6577   %}
 6578   ins_pipe(ialu_reg);
 6579 %}
 6580 
 6581 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6582   predicate(UseCountLeadingZerosInstruction);
 6583   match(Set dst (CountLeadingZerosI (LoadI src)));
 6584   effect(KILL cr);
 6585   ins_cost(175);
 6586   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6587   ins_encode %{
 6588     __ lzcntl($dst$$Register, $src$$Address);
 6589   %}
 6590   ins_pipe(ialu_reg_mem);
 6591 %}
 6592 
 6593 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 6594   predicate(!UseCountLeadingZerosInstruction);
 6595   match(Set dst (CountLeadingZerosI src));
 6596   effect(KILL cr);
 6597 
 6598   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 6599             "jnz     skip\n\t"
 6600             "movl    $dst, -1\n"
 6601       "skip:\n\t"
 6602             "negl    $dst\n\t"
 6603             "addl    $dst, 31" %}
 6604   ins_encode %{
 6605     Register Rdst = $dst$$Register;
 6606     Register Rsrc = $src$$Register;
 6607     Label skip;
 6608     __ bsrl(Rdst, Rsrc);
 6609     __ jccb(Assembler::notZero, skip);
 6610     __ movl(Rdst, -1);
 6611     __ bind(skip);
 6612     __ negl(Rdst);
 6613     __ addl(Rdst, BitsPerInt - 1);
 6614   %}
 6615   ins_pipe(ialu_reg);
 6616 %}
 6617 
 6618 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6619   predicate(UseCountLeadingZerosInstruction);
 6620   match(Set dst (CountLeadingZerosL src));
 6621   effect(KILL cr);
 6622 
 6623   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6624   ins_encode %{
 6625     __ lzcntq($dst$$Register, $src$$Register);
 6626   %}
 6627   ins_pipe(ialu_reg);
 6628 %}
 6629 
 6630 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6631   predicate(UseCountLeadingZerosInstruction);
 6632   match(Set dst (CountLeadingZerosL (LoadL src)));
 6633   effect(KILL cr);
 6634   ins_cost(175);
 6635   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6636   ins_encode %{
 6637     __ lzcntq($dst$$Register, $src$$Address);
 6638   %}
 6639   ins_pipe(ialu_reg_mem);
 6640 %}
 6641 
 6642 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 6643   predicate(!UseCountLeadingZerosInstruction);
 6644   match(Set dst (CountLeadingZerosL src));
 6645   effect(KILL cr);
 6646 
 6647   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 6648             "jnz     skip\n\t"
 6649             "movl    $dst, -1\n"
 6650       "skip:\n\t"
 6651             "negl    $dst\n\t"
 6652             "addl    $dst, 63" %}
 6653   ins_encode %{
 6654     Register Rdst = $dst$$Register;
 6655     Register Rsrc = $src$$Register;
 6656     Label skip;
 6657     __ bsrq(Rdst, Rsrc);
 6658     __ jccb(Assembler::notZero, skip);
 6659     __ movl(Rdst, -1);
 6660     __ bind(skip);
 6661     __ negl(Rdst);
 6662     __ addl(Rdst, BitsPerLong - 1);
 6663   %}
 6664   ins_pipe(ialu_reg);
 6665 %}
 6666 
 6667 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6668   predicate(UseCountTrailingZerosInstruction);
 6669   match(Set dst (CountTrailingZerosI src));
 6670   effect(KILL cr);
 6671 
 6672   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6673   ins_encode %{
 6674     __ tzcntl($dst$$Register, $src$$Register);
 6675   %}
 6676   ins_pipe(ialu_reg);
 6677 %}
 6678 
 6679 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6680   predicate(UseCountTrailingZerosInstruction);
 6681   match(Set dst (CountTrailingZerosI (LoadI src)));
 6682   effect(KILL cr);
 6683   ins_cost(175);
 6684   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6685   ins_encode %{
 6686     __ tzcntl($dst$$Register, $src$$Address);
 6687   %}
 6688   ins_pipe(ialu_reg_mem);
 6689 %}
 6690 
 6691 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 6692   predicate(!UseCountTrailingZerosInstruction);
 6693   match(Set dst (CountTrailingZerosI src));
 6694   effect(KILL cr);
 6695 
 6696   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 6697             "jnz     done\n\t"
 6698             "movl    $dst, 32\n"
 6699       "done:" %}
 6700   ins_encode %{
 6701     Register Rdst = $dst$$Register;
 6702     Label done;
 6703     __ bsfl(Rdst, $src$$Register);
 6704     __ jccb(Assembler::notZero, done);
 6705     __ movl(Rdst, BitsPerInt);
 6706     __ bind(done);
 6707   %}
 6708   ins_pipe(ialu_reg);
 6709 %}
 6710 
 6711 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6712   predicate(UseCountTrailingZerosInstruction);
 6713   match(Set dst (CountTrailingZerosL src));
 6714   effect(KILL cr);
 6715 
 6716   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6717   ins_encode %{
 6718     __ tzcntq($dst$$Register, $src$$Register);
 6719   %}
 6720   ins_pipe(ialu_reg);
 6721 %}
 6722 
 6723 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6724   predicate(UseCountTrailingZerosInstruction);
 6725   match(Set dst (CountTrailingZerosL (LoadL src)));
 6726   effect(KILL cr);
 6727   ins_cost(175);
 6728   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6729   ins_encode %{
 6730     __ tzcntq($dst$$Register, $src$$Address);
 6731   %}
 6732   ins_pipe(ialu_reg_mem);
 6733 %}
 6734 
 6735 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 6736   predicate(!UseCountTrailingZerosInstruction);
 6737   match(Set dst (CountTrailingZerosL src));
 6738   effect(KILL cr);
 6739 
 6740   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 6741             "jnz     done\n\t"
 6742             "movl    $dst, 64\n"
 6743       "done:" %}
 6744   ins_encode %{
 6745     Register Rdst = $dst$$Register;
 6746     Label done;
 6747     __ bsfq(Rdst, $src$$Register);
 6748     __ jccb(Assembler::notZero, done);
 6749     __ movl(Rdst, BitsPerLong);
 6750     __ bind(done);
 6751   %}
 6752   ins_pipe(ialu_reg);
 6753 %}
 6754 
 6755 //--------------- Reverse Operation Instructions ----------------
 6756 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 6757   predicate(!VM_Version::supports_gfni());
 6758   match(Set dst (ReverseI src));
 6759   effect(TEMP dst, TEMP rtmp, KILL cr);
 6760   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 6761   ins_encode %{
 6762     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 6763   %}
 6764   ins_pipe( ialu_reg );
 6765 %}
 6766 
 6767 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6768   predicate(VM_Version::supports_gfni());
 6769   match(Set dst (ReverseI src));
 6770   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6771   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6772   ins_encode %{
 6773     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 6774   %}
 6775   ins_pipe( ialu_reg );
 6776 %}
 6777 
 6778 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 6779   predicate(!VM_Version::supports_gfni());
 6780   match(Set dst (ReverseL src));
 6781   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 6782   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 6783   ins_encode %{
 6784     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 6785   %}
 6786   ins_pipe( ialu_reg );
 6787 %}
 6788 
 6789 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6790   predicate(VM_Version::supports_gfni());
 6791   match(Set dst (ReverseL src));
 6792   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6793   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6794   ins_encode %{
 6795     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 6796   %}
 6797   ins_pipe( ialu_reg );
 6798 %}
 6799 
 6800 //---------- Population Count Instructions -------------------------------------
 6801 
 6802 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6803   predicate(UsePopCountInstruction);
 6804   match(Set dst (PopCountI src));
 6805   effect(KILL cr);
 6806 
 6807   format %{ "popcnt  $dst, $src" %}
 6808   ins_encode %{
 6809     __ popcntl($dst$$Register, $src$$Register);
 6810   %}
 6811   ins_pipe(ialu_reg);
 6812 %}
 6813 
 6814 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6815   predicate(UsePopCountInstruction);
 6816   match(Set dst (PopCountI (LoadI mem)));
 6817   effect(KILL cr);
 6818 
 6819   format %{ "popcnt  $dst, $mem" %}
 6820   ins_encode %{
 6821     __ popcntl($dst$$Register, $mem$$Address);
 6822   %}
 6823   ins_pipe(ialu_reg);
 6824 %}
 6825 
 6826 // Note: Long.bitCount(long) returns an int.
 6827 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6828   predicate(UsePopCountInstruction);
 6829   match(Set dst (PopCountL src));
 6830   effect(KILL cr);
 6831 
 6832   format %{ "popcnt  $dst, $src" %}
 6833   ins_encode %{
 6834     __ popcntq($dst$$Register, $src$$Register);
 6835   %}
 6836   ins_pipe(ialu_reg);
 6837 %}
 6838 
 6839 // Note: Long.bitCount(long) returns an int.
 6840 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6841   predicate(UsePopCountInstruction);
 6842   match(Set dst (PopCountL (LoadL mem)));
 6843   effect(KILL cr);
 6844 
 6845   format %{ "popcnt  $dst, $mem" %}
 6846   ins_encode %{
 6847     __ popcntq($dst$$Register, $mem$$Address);
 6848   %}
 6849   ins_pipe(ialu_reg);
 6850 %}
 6851 
 6852 
 6853 //----------MemBar Instructions-----------------------------------------------
 6854 // Memory barrier flavors
 6855 
 6856 instruct membar_acquire()
 6857 %{
 6858   match(MemBarAcquire);
 6859   match(LoadFence);
 6860   ins_cost(0);
 6861 
 6862   size(0);
 6863   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6864   ins_encode();
 6865   ins_pipe(empty);
 6866 %}
 6867 
 6868 instruct membar_acquire_lock()
 6869 %{
 6870   match(MemBarAcquireLock);
 6871   ins_cost(0);
 6872 
 6873   size(0);
 6874   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6875   ins_encode();
 6876   ins_pipe(empty);
 6877 %}
 6878 
 6879 instruct membar_release()
 6880 %{
 6881   match(MemBarRelease);
 6882   match(StoreFence);
 6883   ins_cost(0);
 6884 
 6885   size(0);
 6886   format %{ "MEMBAR-release ! (empty encoding)" %}
 6887   ins_encode();
 6888   ins_pipe(empty);
 6889 %}
 6890 
 6891 instruct membar_release_lock()
 6892 %{
 6893   match(MemBarReleaseLock);
 6894   ins_cost(0);
 6895 
 6896   size(0);
 6897   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6898   ins_encode();
 6899   ins_pipe(empty);
 6900 %}
 6901 
 6902 instruct membar_volatile(rFlagsReg cr) %{
 6903   match(MemBarVolatile);
 6904   effect(KILL cr);
 6905   ins_cost(400);
 6906 
 6907   format %{
 6908     $$template
 6909     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6910   %}
 6911   ins_encode %{
 6912     __ membar(Assembler::StoreLoad);
 6913   %}
 6914   ins_pipe(pipe_slow);
 6915 %}
 6916 
 6917 instruct unnecessary_membar_volatile()
 6918 %{
 6919   match(MemBarVolatile);
 6920   predicate(Matcher::post_store_load_barrier(n));
 6921   ins_cost(0);
 6922 
 6923   size(0);
 6924   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6925   ins_encode();
 6926   ins_pipe(empty);
 6927 %}
 6928 
 6929 instruct membar_storestore() %{
 6930   match(MemBarStoreStore);
 6931   match(StoreStoreFence);
 6932   ins_cost(0);
 6933 
 6934   size(0);
 6935   format %{ "MEMBAR-storestore (empty encoding)" %}
 6936   ins_encode( );
 6937   ins_pipe(empty);
 6938 %}
 6939 
 6940 //----------Move Instructions--------------------------------------------------
 6941 
 6942 instruct castX2P(rRegP dst, rRegL src)
 6943 %{
 6944   match(Set dst (CastX2P src));
 6945 
 6946   format %{ "movq    $dst, $src\t# long->ptr" %}
 6947   ins_encode %{
 6948     if ($dst$$reg != $src$$reg) {
 6949       __ movptr($dst$$Register, $src$$Register);
 6950     }
 6951   %}
 6952   ins_pipe(ialu_reg_reg); // XXX
 6953 %}
 6954 
 6955 instruct castP2X(rRegL dst, rRegP src)
 6956 %{
 6957   match(Set dst (CastP2X src));
 6958 
 6959   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6960   ins_encode %{
 6961     if ($dst$$reg != $src$$reg) {
 6962       __ movptr($dst$$Register, $src$$Register);
 6963     }
 6964   %}
 6965   ins_pipe(ialu_reg_reg); // XXX
 6966 %}
 6967 
 6968 // Convert oop into int for vectors alignment masking
 6969 instruct convP2I(rRegI dst, rRegP src)
 6970 %{
 6971   match(Set dst (ConvL2I (CastP2X src)));
 6972 
 6973   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6974   ins_encode %{
 6975     __ movl($dst$$Register, $src$$Register);
 6976   %}
 6977   ins_pipe(ialu_reg_reg); // XXX
 6978 %}
 6979 
 6980 // Convert compressed oop into int for vectors alignment masking
 6981 // in case of 32bit oops (heap < 4Gb).
 6982 instruct convN2I(rRegI dst, rRegN src)
 6983 %{
 6984   predicate(CompressedOops::shift() == 0);
 6985   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6986 
 6987   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6988   ins_encode %{
 6989     __ movl($dst$$Register, $src$$Register);
 6990   %}
 6991   ins_pipe(ialu_reg_reg); // XXX
 6992 %}
 6993 
 6994 // Convert oop pointer into compressed form
 6995 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 6996   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 6997   match(Set dst (EncodeP src));
 6998   effect(KILL cr);
 6999   format %{ "encode_heap_oop $dst,$src" %}
 7000   ins_encode %{
 7001     Register s = $src$$Register;
 7002     Register d = $dst$$Register;
 7003     if (s != d) {
 7004       __ movq(d, s);
 7005     }
 7006     __ encode_heap_oop(d);
 7007   %}
 7008   ins_pipe(ialu_reg_long);
 7009 %}
 7010 
 7011 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7012   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 7013   match(Set dst (EncodeP src));
 7014   effect(KILL cr);
 7015   format %{ "encode_heap_oop_not_null $dst,$src" %}
 7016   ins_encode %{
 7017     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 7018   %}
 7019   ins_pipe(ialu_reg_long);
 7020 %}
 7021 
 7022 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 7023   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 7024             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 7025   match(Set dst (DecodeN src));
 7026   effect(KILL cr);
 7027   format %{ "decode_heap_oop $dst,$src" %}
 7028   ins_encode %{
 7029     Register s = $src$$Register;
 7030     Register d = $dst$$Register;
 7031     if (s != d) {
 7032       __ movq(d, s);
 7033     }
 7034     __ decode_heap_oop(d);
 7035   %}
 7036   ins_pipe(ialu_reg_long);
 7037 %}
 7038 
 7039 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7040   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 7041             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 7042   match(Set dst (DecodeN src));
 7043   effect(KILL cr);
 7044   format %{ "decode_heap_oop_not_null $dst,$src" %}
 7045   ins_encode %{
 7046     Register s = $src$$Register;
 7047     Register d = $dst$$Register;
 7048     if (s != d) {
 7049       __ decode_heap_oop_not_null(d, s);
 7050     } else {
 7051       __ decode_heap_oop_not_null(d);
 7052     }
 7053   %}
 7054   ins_pipe(ialu_reg_long);
 7055 %}
 7056 
 7057 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7058   match(Set dst (EncodePKlass src));
 7059   effect(TEMP dst, KILL cr);
 7060   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 7061   ins_encode %{
 7062     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7063   %}
 7064   ins_pipe(ialu_reg_long);
 7065 %}
 7066 
 7067 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7068   match(Set dst (DecodeNKlass src));
 7069   effect(TEMP dst, KILL cr);
 7070   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 7071   ins_encode %{
 7072     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7073   %}
 7074   ins_pipe(ialu_reg_long);
 7075 %}
 7076 
 7077 //----------Conditional Move---------------------------------------------------
 7078 // Jump
 7079 // dummy instruction for generating temp registers
 7080 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 7081   match(Jump (LShiftL switch_val shift));
 7082   ins_cost(350);
 7083   predicate(false);
 7084   effect(TEMP dest);
 7085 
 7086   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7087             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 7088   ins_encode %{
 7089     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7090     // to do that and the compiler is using that register as one it can allocate.
 7091     // So we build it all by hand.
 7092     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 7093     // ArrayAddress dispatch(table, index);
 7094     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 7095     __ lea($dest$$Register, $constantaddress);
 7096     __ jmp(dispatch);
 7097   %}
 7098   ins_pipe(pipe_jmp);
 7099 %}
 7100 
 7101 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 7102   match(Jump (AddL (LShiftL switch_val shift) offset));
 7103   ins_cost(350);
 7104   effect(TEMP dest);
 7105 
 7106   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7107             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 7108   ins_encode %{
 7109     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7110     // to do that and the compiler is using that register as one it can allocate.
 7111     // So we build it all by hand.
 7112     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7113     // ArrayAddress dispatch(table, index);
 7114     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7115     __ lea($dest$$Register, $constantaddress);
 7116     __ jmp(dispatch);
 7117   %}
 7118   ins_pipe(pipe_jmp);
 7119 %}
 7120 
 7121 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 7122   match(Jump switch_val);
 7123   ins_cost(350);
 7124   effect(TEMP dest);
 7125 
 7126   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7127             "jmp     [$dest + $switch_val]\n\t" %}
 7128   ins_encode %{
 7129     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7130     // to do that and the compiler is using that register as one it can allocate.
 7131     // So we build it all by hand.
 7132     // Address index(noreg, switch_reg, Address::times_1);
 7133     // ArrayAddress dispatch(table, index);
 7134     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 7135     __ lea($dest$$Register, $constantaddress);
 7136     __ jmp(dispatch);
 7137   %}
 7138   ins_pipe(pipe_jmp);
 7139 %}
 7140 
 7141 // Conditional move
 7142 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7143 %{
 7144   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7145   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7146 
 7147   ins_cost(100); // XXX
 7148   format %{ "setbn$cop $dst\t# signed, int" %}
 7149   ins_encode %{
 7150     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7151     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7152   %}
 7153   ins_pipe(ialu_reg);
 7154 %}
 7155 
 7156 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 7157 %{
 7158   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7159 
 7160   ins_cost(200); // XXX
 7161   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7162   ins_encode %{
 7163     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7164   %}
 7165   ins_pipe(pipe_cmov_reg);
 7166 %}
 7167 
 7168 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7169 %{
 7170   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7171   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7172 
 7173   ins_cost(100); // XXX
 7174   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7175   ins_encode %{
 7176     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7177     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7178   %}
 7179   ins_pipe(ialu_reg);
 7180 %}
 7181 
 7182 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 7183   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7184 
 7185   ins_cost(200); // XXX
 7186   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7187   ins_encode %{
 7188     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7189   %}
 7190   ins_pipe(pipe_cmov_reg);
 7191 %}
 7192 
 7193 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7194 %{
 7195   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7196   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7197 
 7198   ins_cost(100); // XXX
 7199   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7200   ins_encode %{
 7201     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7202     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7203   %}
 7204   ins_pipe(ialu_reg);
 7205 %}
 7206 
 7207 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7208   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7209   ins_cost(200);
 7210   expand %{
 7211     cmovI_regU(cop, cr, dst, src);
 7212   %}
 7213 %}
 7214 
 7215 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7216   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7217   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7218 
 7219   ins_cost(200); // XXX
 7220   format %{ "cmovpl  $dst, $src\n\t"
 7221             "cmovnel $dst, $src" %}
 7222   ins_encode %{
 7223     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7224     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7225   %}
 7226   ins_pipe(pipe_cmov_reg);
 7227 %}
 7228 
 7229 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7230 // inputs of the CMove
 7231 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7232   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7233   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7234 
 7235   ins_cost(200); // XXX
 7236   format %{ "cmovpl  $dst, $src\n\t"
 7237             "cmovnel $dst, $src" %}
 7238   ins_encode %{
 7239     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7240     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7241   %}
 7242   ins_pipe(pipe_cmov_reg);
 7243 %}
 7244 
 7245 // Conditional move
 7246 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 7247   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7248 
 7249   ins_cost(250); // XXX
 7250   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7251   ins_encode %{
 7252     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7253   %}
 7254   ins_pipe(pipe_cmov_mem);
 7255 %}
 7256 
 7257 // Conditional move
 7258 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 7259 %{
 7260   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7261 
 7262   ins_cost(250); // XXX
 7263   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7264   ins_encode %{
 7265     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7266   %}
 7267   ins_pipe(pipe_cmov_mem);
 7268 %}
 7269 
 7270 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 7271   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7272   ins_cost(250);
 7273   expand %{
 7274     cmovI_memU(cop, cr, dst, src);
 7275   %}
 7276 %}
 7277 
 7278 // Conditional move
 7279 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 7280 %{
 7281   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7282 
 7283   ins_cost(200); // XXX
 7284   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 7285   ins_encode %{
 7286     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7287   %}
 7288   ins_pipe(pipe_cmov_reg);
 7289 %}
 7290 
 7291 // Conditional move
 7292 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 7293 %{
 7294   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7295 
 7296   ins_cost(200); // XXX
 7297   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 7298   ins_encode %{
 7299     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7300   %}
 7301   ins_pipe(pipe_cmov_reg);
 7302 %}
 7303 
 7304 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7305   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7306   ins_cost(200);
 7307   expand %{
 7308     cmovN_regU(cop, cr, dst, src);
 7309   %}
 7310 %}
 7311 
 7312 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7313   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7314   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7315 
 7316   ins_cost(200); // XXX
 7317   format %{ "cmovpl  $dst, $src\n\t"
 7318             "cmovnel $dst, $src" %}
 7319   ins_encode %{
 7320     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7321     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7322   %}
 7323   ins_pipe(pipe_cmov_reg);
 7324 %}
 7325 
 7326 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7327 // inputs of the CMove
 7328 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7329   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7330   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 7331 
 7332   ins_cost(200); // XXX
 7333   format %{ "cmovpl  $dst, $src\n\t"
 7334             "cmovnel $dst, $src" %}
 7335   ins_encode %{
 7336     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7337     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7338   %}
 7339   ins_pipe(pipe_cmov_reg);
 7340 %}
 7341 
 7342 // Conditional move
 7343 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 7344 %{
 7345   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7346 
 7347   ins_cost(200); // XXX
 7348   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 7349   ins_encode %{
 7350     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7351   %}
 7352   ins_pipe(pipe_cmov_reg);  // XXX
 7353 %}
 7354 
 7355 // Conditional move
 7356 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 7357 %{
 7358   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7359 
 7360   ins_cost(200); // XXX
 7361   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 7362   ins_encode %{
 7363     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7364   %}
 7365   ins_pipe(pipe_cmov_reg); // XXX
 7366 %}
 7367 
 7368 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7369   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7370   ins_cost(200);
 7371   expand %{
 7372     cmovP_regU(cop, cr, dst, src);
 7373   %}
 7374 %}
 7375 
 7376 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7377   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7378   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7379 
 7380   ins_cost(200); // XXX
 7381   format %{ "cmovpq  $dst, $src\n\t"
 7382             "cmovneq $dst, $src" %}
 7383   ins_encode %{
 7384     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7385     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7386   %}
 7387   ins_pipe(pipe_cmov_reg);
 7388 %}
 7389 
 7390 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7391 // inputs of the CMove
 7392 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7393   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7394   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 7395 
 7396   ins_cost(200); // XXX
 7397   format %{ "cmovpq  $dst, $src\n\t"
 7398             "cmovneq $dst, $src" %}
 7399   ins_encode %{
 7400     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7401     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7402   %}
 7403   ins_pipe(pipe_cmov_reg);
 7404 %}
 7405 
 7406 // DISABLED: Requires the ADLC to emit a bottom_type call that
 7407 // correctly meets the two pointer arguments; one is an incoming
 7408 // register but the other is a memory operand.  ALSO appears to
 7409 // be buggy with implicit null checks.
 7410 //
 7411 //// Conditional move
 7412 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
 7413 //%{
 7414 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7415 //  ins_cost(250);
 7416 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7417 //  opcode(0x0F,0x40);
 7418 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7419 //  ins_pipe( pipe_cmov_mem );
 7420 //%}
 7421 //
 7422 //// Conditional move
 7423 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
 7424 //%{
 7425 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7426 //  ins_cost(250);
 7427 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7428 //  opcode(0x0F,0x40);
 7429 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7430 //  ins_pipe( pipe_cmov_mem );
 7431 //%}
 7432 
 7433 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7434 %{
 7435   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7436   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7437 
 7438   ins_cost(100); // XXX
 7439   format %{ "setbn$cop $dst\t# signed, long" %}
 7440   ins_encode %{
 7441     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7442     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7443   %}
 7444   ins_pipe(ialu_reg);
 7445 %}
 7446 
 7447 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 7448 %{
 7449   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7450 
 7451   ins_cost(200); // XXX
 7452   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7453   ins_encode %{
 7454     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7455   %}
 7456   ins_pipe(pipe_cmov_reg);  // XXX
 7457 %}
 7458 
 7459 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 7460 %{
 7461   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7462 
 7463   ins_cost(200); // XXX
 7464   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7465   ins_encode %{
 7466     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7467   %}
 7468   ins_pipe(pipe_cmov_mem);  // XXX
 7469 %}
 7470 
 7471 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7472 %{
 7473   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7474   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7475 
 7476   ins_cost(100); // XXX
 7477   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7478   ins_encode %{
 7479     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7480     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7481   %}
 7482   ins_pipe(ialu_reg);
 7483 %}
 7484 
 7485 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 7486 %{
 7487   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7488 
 7489   ins_cost(200); // XXX
 7490   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7491   ins_encode %{
 7492     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7493   %}
 7494   ins_pipe(pipe_cmov_reg); // XXX
 7495 %}
 7496 
 7497 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7498 %{
 7499   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7500   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7501 
 7502   ins_cost(100); // XXX
 7503   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7504   ins_encode %{
 7505     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7506     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7507   %}
 7508   ins_pipe(ialu_reg);
 7509 %}
 7510 
 7511 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7512   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7513   ins_cost(200);
 7514   expand %{
 7515     cmovL_regU(cop, cr, dst, src);
 7516   %}
 7517 %}
 7518 
 7519 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7520   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7521   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7522 
 7523   ins_cost(200); // XXX
 7524   format %{ "cmovpq  $dst, $src\n\t"
 7525             "cmovneq $dst, $src" %}
 7526   ins_encode %{
 7527     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7528     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7529   %}
 7530   ins_pipe(pipe_cmov_reg);
 7531 %}
 7532 
 7533 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7534 // inputs of the CMove
 7535 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7536   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7537   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7538 
 7539   ins_cost(200); // XXX
 7540   format %{ "cmovpq  $dst, $src\n\t"
 7541             "cmovneq $dst, $src" %}
 7542   ins_encode %{
 7543     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7544     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7545   %}
 7546   ins_pipe(pipe_cmov_reg);
 7547 %}
 7548 
 7549 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 7550 %{
 7551   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7552 
 7553   ins_cost(200); // XXX
 7554   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7555   ins_encode %{
 7556     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7557   %}
 7558   ins_pipe(pipe_cmov_mem); // XXX
 7559 %}
 7560 
 7561 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 7562   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7563   ins_cost(200);
 7564   expand %{
 7565     cmovL_memU(cop, cr, dst, src);
 7566   %}
 7567 %}
 7568 
 7569 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 7570 %{
 7571   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7572 
 7573   ins_cost(200); // XXX
 7574   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7575             "movss     $dst, $src\n"
 7576     "skip:" %}
 7577   ins_encode %{
 7578     Label Lskip;
 7579     // Invert sense of branch from sense of CMOV
 7580     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7581     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7582     __ bind(Lskip);
 7583   %}
 7584   ins_pipe(pipe_slow);
 7585 %}
 7586 
 7587 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
 7588 // %{
 7589 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
 7590 
 7591 //   ins_cost(200); // XXX
 7592 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7593 //             "movss     $dst, $src\n"
 7594 //     "skip:" %}
 7595 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
 7596 //   ins_pipe(pipe_slow);
 7597 // %}
 7598 
 7599 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 7600 %{
 7601   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7602 
 7603   ins_cost(200); // XXX
 7604   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 7605             "movss     $dst, $src\n"
 7606     "skip:" %}
 7607   ins_encode %{
 7608     Label Lskip;
 7609     // Invert sense of branch from sense of CMOV
 7610     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7611     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7612     __ bind(Lskip);
 7613   %}
 7614   ins_pipe(pipe_slow);
 7615 %}
 7616 
 7617 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 7618   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7619   ins_cost(200);
 7620   expand %{
 7621     cmovF_regU(cop, cr, dst, src);
 7622   %}
 7623 %}
 7624 
 7625 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 7626 %{
 7627   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7628 
 7629   ins_cost(200); // XXX
 7630   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 7631             "movsd     $dst, $src\n"
 7632     "skip:" %}
 7633   ins_encode %{
 7634     Label Lskip;
 7635     // Invert sense of branch from sense of CMOV
 7636     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7637     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7638     __ bind(Lskip);
 7639   %}
 7640   ins_pipe(pipe_slow);
 7641 %}
 7642 
 7643 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 7644 %{
 7645   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7646 
 7647   ins_cost(200); // XXX
 7648   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 7649             "movsd     $dst, $src\n"
 7650     "skip:" %}
 7651   ins_encode %{
 7652     Label Lskip;
 7653     // Invert sense of branch from sense of CMOV
 7654     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7655     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7656     __ bind(Lskip);
 7657   %}
 7658   ins_pipe(pipe_slow);
 7659 %}
 7660 
 7661 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 7662   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7663   ins_cost(200);
 7664   expand %{
 7665     cmovD_regU(cop, cr, dst, src);
 7666   %}
 7667 %}
 7668 
 7669 //----------Arithmetic Instructions--------------------------------------------
 7670 //----------Addition Instructions----------------------------------------------
 7671 
 7672 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7673 %{
 7674   match(Set dst (AddI dst src));
 7675   effect(KILL cr);
 7676 
 7677   format %{ "addl    $dst, $src\t# int" %}
 7678   ins_encode %{
 7679     __ addl($dst$$Register, $src$$Register);
 7680   %}
 7681   ins_pipe(ialu_reg_reg);
 7682 %}
 7683 
 7684 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 7685 %{
 7686   match(Set dst (AddI dst src));
 7687   effect(KILL cr);
 7688 
 7689   format %{ "addl    $dst, $src\t# int" %}
 7690   ins_encode %{
 7691     __ addl($dst$$Register, $src$$constant);
 7692   %}
 7693   ins_pipe( ialu_reg );
 7694 %}
 7695 
 7696 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7697 %{
 7698   match(Set dst (AddI dst (LoadI src)));
 7699   effect(KILL cr);
 7700 
 7701   ins_cost(150); // XXX
 7702   format %{ "addl    $dst, $src\t# int" %}
 7703   ins_encode %{
 7704     __ addl($dst$$Register, $src$$Address);
 7705   %}
 7706   ins_pipe(ialu_reg_mem);
 7707 %}
 7708 
 7709 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7710 %{
 7711   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7712   effect(KILL cr);
 7713 
 7714   ins_cost(150); // XXX
 7715   format %{ "addl    $dst, $src\t# int" %}
 7716   ins_encode %{
 7717     __ addl($dst$$Address, $src$$Register);
 7718   %}
 7719   ins_pipe(ialu_mem_reg);
 7720 %}
 7721 
 7722 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 7723 %{
 7724   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7725   effect(KILL cr);
 7726 
 7727   ins_cost(125); // XXX
 7728   format %{ "addl    $dst, $src\t# int" %}
 7729   ins_encode %{
 7730     __ addl($dst$$Address, $src$$constant);
 7731   %}
 7732   ins_pipe(ialu_mem_imm);
 7733 %}
 7734 
 7735 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 7736 %{
 7737   predicate(UseIncDec);
 7738   match(Set dst (AddI dst src));
 7739   effect(KILL cr);
 7740 
 7741   format %{ "incl    $dst\t# int" %}
 7742   ins_encode %{
 7743     __ incrementl($dst$$Register);
 7744   %}
 7745   ins_pipe(ialu_reg);
 7746 %}
 7747 
 7748 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 7749 %{
 7750   predicate(UseIncDec);
 7751   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7752   effect(KILL cr);
 7753 
 7754   ins_cost(125); // XXX
 7755   format %{ "incl    $dst\t# int" %}
 7756   ins_encode %{
 7757     __ incrementl($dst$$Address);
 7758   %}
 7759   ins_pipe(ialu_mem_imm);
 7760 %}
 7761 
 7762 // XXX why does that use AddI
 7763 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 7764 %{
 7765   predicate(UseIncDec);
 7766   match(Set dst (AddI dst src));
 7767   effect(KILL cr);
 7768 
 7769   format %{ "decl    $dst\t# int" %}
 7770   ins_encode %{
 7771     __ decrementl($dst$$Register);
 7772   %}
 7773   ins_pipe(ialu_reg);
 7774 %}
 7775 
 7776 // XXX why does that use AddI
 7777 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 7778 %{
 7779   predicate(UseIncDec);
 7780   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7781   effect(KILL cr);
 7782 
 7783   ins_cost(125); // XXX
 7784   format %{ "decl    $dst\t# int" %}
 7785   ins_encode %{
 7786     __ decrementl($dst$$Address);
 7787   %}
 7788   ins_pipe(ialu_mem_imm);
 7789 %}
 7790 
 7791 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 7792 %{
 7793   predicate(VM_Version::supports_fast_2op_lea());
 7794   match(Set dst (AddI (LShiftI index scale) disp));
 7795 
 7796   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 7797   ins_encode %{
 7798     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7799     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7800   %}
 7801   ins_pipe(ialu_reg_reg);
 7802 %}
 7803 
 7804 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 7805 %{
 7806   predicate(VM_Version::supports_fast_3op_lea());
 7807   match(Set dst (AddI (AddI base index) disp));
 7808 
 7809   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 7810   ins_encode %{
 7811     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7812   %}
 7813   ins_pipe(ialu_reg_reg);
 7814 %}
 7815 
 7816 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 7817 %{
 7818   predicate(VM_Version::supports_fast_2op_lea());
 7819   match(Set dst (AddI base (LShiftI index scale)));
 7820 
 7821   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 7822   ins_encode %{
 7823     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7824     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7825   %}
 7826   ins_pipe(ialu_reg_reg);
 7827 %}
 7828 
 7829 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 7830 %{
 7831   predicate(VM_Version::supports_fast_3op_lea());
 7832   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 7833 
 7834   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 7835   ins_encode %{
 7836     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7837     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7838   %}
 7839   ins_pipe(ialu_reg_reg);
 7840 %}
 7841 
 7842 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7843 %{
 7844   match(Set dst (AddL dst src));
 7845   effect(KILL cr);
 7846 
 7847   format %{ "addq    $dst, $src\t# long" %}
 7848   ins_encode %{
 7849     __ addq($dst$$Register, $src$$Register);
 7850   %}
 7851   ins_pipe(ialu_reg_reg);
 7852 %}
 7853 
 7854 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 7855 %{
 7856   match(Set dst (AddL dst src));
 7857   effect(KILL cr);
 7858 
 7859   format %{ "addq    $dst, $src\t# long" %}
 7860   ins_encode %{
 7861     __ addq($dst$$Register, $src$$constant);
 7862   %}
 7863   ins_pipe( ialu_reg );
 7864 %}
 7865 
 7866 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7867 %{
 7868   match(Set dst (AddL dst (LoadL src)));
 7869   effect(KILL cr);
 7870 
 7871   ins_cost(150); // XXX
 7872   format %{ "addq    $dst, $src\t# long" %}
 7873   ins_encode %{
 7874     __ addq($dst$$Register, $src$$Address);
 7875   %}
 7876   ins_pipe(ialu_reg_mem);
 7877 %}
 7878 
 7879 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7880 %{
 7881   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7882   effect(KILL cr);
 7883 
 7884   ins_cost(150); // XXX
 7885   format %{ "addq    $dst, $src\t# long" %}
 7886   ins_encode %{
 7887     __ addq($dst$$Address, $src$$Register);
 7888   %}
 7889   ins_pipe(ialu_mem_reg);
 7890 %}
 7891 
 7892 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7893 %{
 7894   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7895   effect(KILL cr);
 7896 
 7897   ins_cost(125); // XXX
 7898   format %{ "addq    $dst, $src\t# long" %}
 7899   ins_encode %{
 7900     __ addq($dst$$Address, $src$$constant);
 7901   %}
 7902   ins_pipe(ialu_mem_imm);
 7903 %}
 7904 
 7905 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7906 %{
 7907   predicate(UseIncDec);
 7908   match(Set dst (AddL dst src));
 7909   effect(KILL cr);
 7910 
 7911   format %{ "incq    $dst\t# long" %}
 7912   ins_encode %{
 7913     __ incrementq($dst$$Register);
 7914   %}
 7915   ins_pipe(ialu_reg);
 7916 %}
 7917 
 7918 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7919 %{
 7920   predicate(UseIncDec);
 7921   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7922   effect(KILL cr);
 7923 
 7924   ins_cost(125); // XXX
 7925   format %{ "incq    $dst\t# long" %}
 7926   ins_encode %{
 7927     __ incrementq($dst$$Address);
 7928   %}
 7929   ins_pipe(ialu_mem_imm);
 7930 %}
 7931 
 7932 // XXX why does that use AddL
 7933 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7934 %{
 7935   predicate(UseIncDec);
 7936   match(Set dst (AddL dst src));
 7937   effect(KILL cr);
 7938 
 7939   format %{ "decq    $dst\t# long" %}
 7940   ins_encode %{
 7941     __ decrementq($dst$$Register);
 7942   %}
 7943   ins_pipe(ialu_reg);
 7944 %}
 7945 
 7946 // XXX why does that use AddL
 7947 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7948 %{
 7949   predicate(UseIncDec);
 7950   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7951   effect(KILL cr);
 7952 
 7953   ins_cost(125); // XXX
 7954   format %{ "decq    $dst\t# long" %}
 7955   ins_encode %{
 7956     __ decrementq($dst$$Address);
 7957   %}
 7958   ins_pipe(ialu_mem_imm);
 7959 %}
 7960 
 7961 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7962 %{
 7963   predicate(VM_Version::supports_fast_2op_lea());
 7964   match(Set dst (AddL (LShiftL index scale) disp));
 7965 
 7966   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7967   ins_encode %{
 7968     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7969     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7970   %}
 7971   ins_pipe(ialu_reg_reg);
 7972 %}
 7973 
 7974 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7975 %{
 7976   predicate(VM_Version::supports_fast_3op_lea());
 7977   match(Set dst (AddL (AddL base index) disp));
 7978 
 7979   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7980   ins_encode %{
 7981     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7982   %}
 7983   ins_pipe(ialu_reg_reg);
 7984 %}
 7985 
 7986 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7987 %{
 7988   predicate(VM_Version::supports_fast_2op_lea());
 7989   match(Set dst (AddL base (LShiftL index scale)));
 7990 
 7991   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 7992   ins_encode %{
 7993     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7994     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7995   %}
 7996   ins_pipe(ialu_reg_reg);
 7997 %}
 7998 
 7999 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 8000 %{
 8001   predicate(VM_Version::supports_fast_3op_lea());
 8002   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 8003 
 8004   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 8005   ins_encode %{
 8006     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 8007     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 8008   %}
 8009   ins_pipe(ialu_reg_reg);
 8010 %}
 8011 
 8012 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 8013 %{
 8014   match(Set dst (AddP dst src));
 8015   effect(KILL cr);
 8016 
 8017   format %{ "addq    $dst, $src\t# ptr" %}
 8018   ins_encode %{
 8019     __ addq($dst$$Register, $src$$Register);
 8020   %}
 8021   ins_pipe(ialu_reg_reg);
 8022 %}
 8023 
 8024 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 8025 %{
 8026   match(Set dst (AddP dst src));
 8027   effect(KILL cr);
 8028 
 8029   format %{ "addq    $dst, $src\t# ptr" %}
 8030   ins_encode %{
 8031     __ addq($dst$$Register, $src$$constant);
 8032   %}
 8033   ins_pipe( ialu_reg );
 8034 %}
 8035 
 8036 // XXX addP mem ops ????
 8037 
 8038 instruct checkCastPP(rRegP dst)
 8039 %{
 8040   match(Set dst (CheckCastPP dst));
 8041 
 8042   size(0);
 8043   format %{ "# checkcastPP of $dst" %}
 8044   ins_encode(/* empty encoding */);
 8045   ins_pipe(empty);
 8046 %}
 8047 
 8048 instruct castPP(rRegP dst)
 8049 %{
 8050   match(Set dst (CastPP dst));
 8051 
 8052   size(0);
 8053   format %{ "# castPP of $dst" %}
 8054   ins_encode(/* empty encoding */);
 8055   ins_pipe(empty);
 8056 %}
 8057 
 8058 instruct castII(rRegI dst)
 8059 %{
 8060   match(Set dst (CastII dst));
 8061 
 8062   size(0);
 8063   format %{ "# castII of $dst" %}
 8064   ins_encode(/* empty encoding */);
 8065   ins_cost(0);
 8066   ins_pipe(empty);
 8067 %}
 8068 
 8069 instruct castLL(rRegL dst)
 8070 %{
 8071   match(Set dst (CastLL dst));
 8072 
 8073   size(0);
 8074   format %{ "# castLL of $dst" %}
 8075   ins_encode(/* empty encoding */);
 8076   ins_cost(0);
 8077   ins_pipe(empty);
 8078 %}
 8079 
 8080 instruct castFF(regF dst)
 8081 %{
 8082   match(Set dst (CastFF dst));
 8083 
 8084   size(0);
 8085   format %{ "# castFF of $dst" %}
 8086   ins_encode(/* empty encoding */);
 8087   ins_cost(0);
 8088   ins_pipe(empty);
 8089 %}
 8090 
 8091 instruct castDD(regD dst)
 8092 %{
 8093   match(Set dst (CastDD dst));
 8094 
 8095   size(0);
 8096   format %{ "# castDD of $dst" %}
 8097   ins_encode(/* empty encoding */);
 8098   ins_cost(0);
 8099   ins_pipe(empty);
 8100 %}
 8101 
 8102 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 8103 instruct compareAndSwapP(rRegI res,
 8104                          memory mem_ptr,
 8105                          rax_RegP oldval, rRegP newval,
 8106                          rFlagsReg cr)
 8107 %{
 8108   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8109   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 8110   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 8111   effect(KILL cr, KILL oldval);
 8112 
 8113   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8114             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8115             "sete    $res\n\t"
 8116             "movzbl  $res, $res" %}
 8117   ins_encode %{
 8118     __ lock();
 8119     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8120     __ setb(Assembler::equal, $res$$Register);
 8121     __ movzbl($res$$Register, $res$$Register);
 8122   %}
 8123   ins_pipe( pipe_cmpxchg );
 8124 %}
 8125 
 8126 instruct compareAndSwapL(rRegI res,
 8127                          memory mem_ptr,
 8128                          rax_RegL oldval, rRegL newval,
 8129                          rFlagsReg cr)
 8130 %{
 8131   predicate(VM_Version::supports_cx8());
 8132   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 8133   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 8134   effect(KILL cr, KILL oldval);
 8135 
 8136   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8137             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8138             "sete    $res\n\t"
 8139             "movzbl  $res, $res" %}
 8140   ins_encode %{
 8141     __ lock();
 8142     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8143     __ setb(Assembler::equal, $res$$Register);
 8144     __ movzbl($res$$Register, $res$$Register);
 8145   %}
 8146   ins_pipe( pipe_cmpxchg );
 8147 %}
 8148 
 8149 instruct compareAndSwapI(rRegI res,
 8150                          memory mem_ptr,
 8151                          rax_RegI oldval, rRegI newval,
 8152                          rFlagsReg cr)
 8153 %{
 8154   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 8155   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 8156   effect(KILL cr, KILL oldval);
 8157 
 8158   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8159             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8160             "sete    $res\n\t"
 8161             "movzbl  $res, $res" %}
 8162   ins_encode %{
 8163     __ lock();
 8164     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8165     __ setb(Assembler::equal, $res$$Register);
 8166     __ movzbl($res$$Register, $res$$Register);
 8167   %}
 8168   ins_pipe( pipe_cmpxchg );
 8169 %}
 8170 
 8171 instruct compareAndSwapB(rRegI res,
 8172                          memory mem_ptr,
 8173                          rax_RegI oldval, rRegI newval,
 8174                          rFlagsReg cr)
 8175 %{
 8176   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 8177   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 8178   effect(KILL cr, KILL oldval);
 8179 
 8180   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8181             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8182             "sete    $res\n\t"
 8183             "movzbl  $res, $res" %}
 8184   ins_encode %{
 8185     __ lock();
 8186     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8187     __ setb(Assembler::equal, $res$$Register);
 8188     __ movzbl($res$$Register, $res$$Register);
 8189   %}
 8190   ins_pipe( pipe_cmpxchg );
 8191 %}
 8192 
 8193 instruct compareAndSwapS(rRegI res,
 8194                          memory mem_ptr,
 8195                          rax_RegI oldval, rRegI newval,
 8196                          rFlagsReg cr)
 8197 %{
 8198   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 8199   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 8200   effect(KILL cr, KILL oldval);
 8201 
 8202   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8203             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8204             "sete    $res\n\t"
 8205             "movzbl  $res, $res" %}
 8206   ins_encode %{
 8207     __ lock();
 8208     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8209     __ setb(Assembler::equal, $res$$Register);
 8210     __ movzbl($res$$Register, $res$$Register);
 8211   %}
 8212   ins_pipe( pipe_cmpxchg );
 8213 %}
 8214 
 8215 instruct compareAndSwapN(rRegI res,
 8216                           memory mem_ptr,
 8217                           rax_RegN oldval, rRegN newval,
 8218                           rFlagsReg cr) %{
 8219   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 8220   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 8221   effect(KILL cr, KILL oldval);
 8222 
 8223   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8224             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8225             "sete    $res\n\t"
 8226             "movzbl  $res, $res" %}
 8227   ins_encode %{
 8228     __ lock();
 8229     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8230     __ setb(Assembler::equal, $res$$Register);
 8231     __ movzbl($res$$Register, $res$$Register);
 8232   %}
 8233   ins_pipe( pipe_cmpxchg );
 8234 %}
 8235 
 8236 instruct compareAndExchangeB(
 8237                          memory mem_ptr,
 8238                          rax_RegI oldval, rRegI newval,
 8239                          rFlagsReg cr)
 8240 %{
 8241   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 8242   effect(KILL cr);
 8243 
 8244   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8245             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8246   ins_encode %{
 8247     __ lock();
 8248     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8249   %}
 8250   ins_pipe( pipe_cmpxchg );
 8251 %}
 8252 
 8253 instruct compareAndExchangeS(
 8254                          memory mem_ptr,
 8255                          rax_RegI oldval, rRegI newval,
 8256                          rFlagsReg cr)
 8257 %{
 8258   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 8259   effect(KILL cr);
 8260 
 8261   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8262             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8263   ins_encode %{
 8264     __ lock();
 8265     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8266   %}
 8267   ins_pipe( pipe_cmpxchg );
 8268 %}
 8269 
 8270 instruct compareAndExchangeI(
 8271                          memory mem_ptr,
 8272                          rax_RegI oldval, rRegI newval,
 8273                          rFlagsReg cr)
 8274 %{
 8275   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 8276   effect(KILL cr);
 8277 
 8278   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8279             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8280   ins_encode %{
 8281     __ lock();
 8282     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8283   %}
 8284   ins_pipe( pipe_cmpxchg );
 8285 %}
 8286 
 8287 instruct compareAndExchangeL(
 8288                          memory mem_ptr,
 8289                          rax_RegL oldval, rRegL newval,
 8290                          rFlagsReg cr)
 8291 %{
 8292   predicate(VM_Version::supports_cx8());
 8293   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 8294   effect(KILL cr);
 8295 
 8296   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8297             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8298   ins_encode %{
 8299     __ lock();
 8300     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8301   %}
 8302   ins_pipe( pipe_cmpxchg );
 8303 %}
 8304 
 8305 instruct compareAndExchangeN(
 8306                           memory mem_ptr,
 8307                           rax_RegN oldval, rRegN newval,
 8308                           rFlagsReg cr) %{
 8309   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 8310   effect(KILL cr);
 8311 
 8312   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8313             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8314   ins_encode %{
 8315     __ lock();
 8316     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8317   %}
 8318   ins_pipe( pipe_cmpxchg );
 8319 %}
 8320 
 8321 instruct compareAndExchangeP(
 8322                          memory mem_ptr,
 8323                          rax_RegP oldval, rRegP newval,
 8324                          rFlagsReg cr)
 8325 %{
 8326   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8327   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 8328   effect(KILL cr);
 8329 
 8330   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8331             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8332   ins_encode %{
 8333     __ lock();
 8334     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8335   %}
 8336   ins_pipe( pipe_cmpxchg );
 8337 %}
 8338 
 8339 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 8340   predicate(n->as_LoadStore()->result_not_used());
 8341   match(Set dummy (GetAndAddB mem add));
 8342   effect(KILL cr);
 8343   format %{ "addb_lock   $mem, $add" %}
 8344   ins_encode %{
 8345     __ lock();
 8346     __ addb($mem$$Address, $add$$Register);
 8347   %}
 8348   ins_pipe(pipe_cmpxchg);
 8349 %}
 8350 
 8351 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8352   predicate(n->as_LoadStore()->result_not_used());
 8353   match(Set dummy (GetAndAddB mem add));
 8354   effect(KILL cr);
 8355   format %{ "addb_lock   $mem, $add" %}
 8356   ins_encode %{
 8357     __ lock();
 8358     __ addb($mem$$Address, $add$$constant);
 8359   %}
 8360   ins_pipe(pipe_cmpxchg);
 8361 %}
 8362 
 8363 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
 8364   predicate(!n->as_LoadStore()->result_not_used());
 8365   match(Set newval (GetAndAddB mem newval));
 8366   effect(KILL cr);
 8367   format %{ "xaddb_lock  $mem, $newval" %}
 8368   ins_encode %{
 8369     __ lock();
 8370     __ xaddb($mem$$Address, $newval$$Register);
 8371   %}
 8372   ins_pipe(pipe_cmpxchg);
 8373 %}
 8374 
 8375 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 8376   predicate(n->as_LoadStore()->result_not_used());
 8377   match(Set dummy (GetAndAddS mem add));
 8378   effect(KILL cr);
 8379   format %{ "addw_lock   $mem, $add" %}
 8380   ins_encode %{
 8381     __ lock();
 8382     __ addw($mem$$Address, $add$$Register);
 8383   %}
 8384   ins_pipe(pipe_cmpxchg);
 8385 %}
 8386 
 8387 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8388   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
 8389   match(Set dummy (GetAndAddS mem add));
 8390   effect(KILL cr);
 8391   format %{ "addw_lock   $mem, $add" %}
 8392   ins_encode %{
 8393     __ lock();
 8394     __ addw($mem$$Address, $add$$constant);
 8395   %}
 8396   ins_pipe(pipe_cmpxchg);
 8397 %}
 8398 
 8399 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
 8400   predicate(!n->as_LoadStore()->result_not_used());
 8401   match(Set newval (GetAndAddS mem newval));
 8402   effect(KILL cr);
 8403   format %{ "xaddw_lock  $mem, $newval" %}
 8404   ins_encode %{
 8405     __ lock();
 8406     __ xaddw($mem$$Address, $newval$$Register);
 8407   %}
 8408   ins_pipe(pipe_cmpxchg);
 8409 %}
 8410 
 8411 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 8412   predicate(n->as_LoadStore()->result_not_used());
 8413   match(Set dummy (GetAndAddI mem add));
 8414   effect(KILL cr);
 8415   format %{ "addl_lock   $mem, $add" %}
 8416   ins_encode %{
 8417     __ lock();
 8418     __ addl($mem$$Address, $add$$Register);
 8419   %}
 8420   ins_pipe(pipe_cmpxchg);
 8421 %}
 8422 
 8423 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8424   predicate(n->as_LoadStore()->result_not_used());
 8425   match(Set dummy (GetAndAddI mem add));
 8426   effect(KILL cr);
 8427   format %{ "addl_lock   $mem, $add" %}
 8428   ins_encode %{
 8429     __ lock();
 8430     __ addl($mem$$Address, $add$$constant);
 8431   %}
 8432   ins_pipe(pipe_cmpxchg);
 8433 %}
 8434 
 8435 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
 8436   predicate(!n->as_LoadStore()->result_not_used());
 8437   match(Set newval (GetAndAddI mem newval));
 8438   effect(KILL cr);
 8439   format %{ "xaddl_lock  $mem, $newval" %}
 8440   ins_encode %{
 8441     __ lock();
 8442     __ xaddl($mem$$Address, $newval$$Register);
 8443   %}
 8444   ins_pipe(pipe_cmpxchg);
 8445 %}
 8446 
 8447 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
 8448   predicate(n->as_LoadStore()->result_not_used());
 8449   match(Set dummy (GetAndAddL mem add));
 8450   effect(KILL cr);
 8451   format %{ "addq_lock   $mem, $add" %}
 8452   ins_encode %{
 8453     __ lock();
 8454     __ addq($mem$$Address, $add$$Register);
 8455   %}
 8456   ins_pipe(pipe_cmpxchg);
 8457 %}
 8458 
 8459 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 8460   predicate(n->as_LoadStore()->result_not_used());
 8461   match(Set dummy (GetAndAddL mem add));
 8462   effect(KILL cr);
 8463   format %{ "addq_lock   $mem, $add" %}
 8464   ins_encode %{
 8465     __ lock();
 8466     __ addq($mem$$Address, $add$$constant);
 8467   %}
 8468   ins_pipe(pipe_cmpxchg);
 8469 %}
 8470 
 8471 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
 8472   predicate(!n->as_LoadStore()->result_not_used());
 8473   match(Set newval (GetAndAddL mem newval));
 8474   effect(KILL cr);
 8475   format %{ "xaddq_lock  $mem, $newval" %}
 8476   ins_encode %{
 8477     __ lock();
 8478     __ xaddq($mem$$Address, $newval$$Register);
 8479   %}
 8480   ins_pipe(pipe_cmpxchg);
 8481 %}
 8482 
 8483 instruct xchgB( memory mem, rRegI newval) %{
 8484   match(Set newval (GetAndSetB mem newval));
 8485   format %{ "XCHGB  $newval,[$mem]" %}
 8486   ins_encode %{
 8487     __ xchgb($newval$$Register, $mem$$Address);
 8488   %}
 8489   ins_pipe( pipe_cmpxchg );
 8490 %}
 8491 
 8492 instruct xchgS( memory mem, rRegI newval) %{
 8493   match(Set newval (GetAndSetS mem newval));
 8494   format %{ "XCHGW  $newval,[$mem]" %}
 8495   ins_encode %{
 8496     __ xchgw($newval$$Register, $mem$$Address);
 8497   %}
 8498   ins_pipe( pipe_cmpxchg );
 8499 %}
 8500 
 8501 instruct xchgI( memory mem, rRegI newval) %{
 8502   match(Set newval (GetAndSetI mem newval));
 8503   format %{ "XCHGL  $newval,[$mem]" %}
 8504   ins_encode %{
 8505     __ xchgl($newval$$Register, $mem$$Address);
 8506   %}
 8507   ins_pipe( pipe_cmpxchg );
 8508 %}
 8509 
 8510 instruct xchgL( memory mem, rRegL newval) %{
 8511   match(Set newval (GetAndSetL mem newval));
 8512   format %{ "XCHGL  $newval,[$mem]" %}
 8513   ins_encode %{
 8514     __ xchgq($newval$$Register, $mem$$Address);
 8515   %}
 8516   ins_pipe( pipe_cmpxchg );
 8517 %}
 8518 
 8519 instruct xchgP( memory mem, rRegP newval) %{
 8520   match(Set newval (GetAndSetP mem newval));
 8521   predicate(n->as_LoadStore()->barrier_data() == 0);
 8522   format %{ "XCHGQ  $newval,[$mem]" %}
 8523   ins_encode %{
 8524     __ xchgq($newval$$Register, $mem$$Address);
 8525   %}
 8526   ins_pipe( pipe_cmpxchg );
 8527 %}
 8528 
 8529 instruct xchgN( memory mem, rRegN newval) %{
 8530   match(Set newval (GetAndSetN mem newval));
 8531   format %{ "XCHGL  $newval,$mem]" %}
 8532   ins_encode %{
 8533     __ xchgl($newval$$Register, $mem$$Address);
 8534   %}
 8535   ins_pipe( pipe_cmpxchg );
 8536 %}
 8537 
 8538 //----------Abs Instructions-------------------------------------------
 8539 
 8540 // Integer Absolute Instructions
 8541 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8542 %{
 8543   match(Set dst (AbsI src));
 8544   effect(TEMP dst, KILL cr);
 8545   format %{ "xorl    $dst, $dst\t# abs int\n\t"
 8546             "subl    $dst, $src\n\t"
 8547             "cmovll  $dst, $src" %}
 8548   ins_encode %{
 8549     __ xorl($dst$$Register, $dst$$Register);
 8550     __ subl($dst$$Register, $src$$Register);
 8551     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
 8552   %}
 8553 
 8554   ins_pipe(ialu_reg_reg);
 8555 %}
 8556 
 8557 // Long Absolute Instructions
 8558 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8559 %{
 8560   match(Set dst (AbsL src));
 8561   effect(TEMP dst, KILL cr);
 8562   format %{ "xorl    $dst, $dst\t# abs long\n\t"
 8563             "subq    $dst, $src\n\t"
 8564             "cmovlq  $dst, $src" %}
 8565   ins_encode %{
 8566     __ xorl($dst$$Register, $dst$$Register);
 8567     __ subq($dst$$Register, $src$$Register);
 8568     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
 8569   %}
 8570 
 8571   ins_pipe(ialu_reg_reg);
 8572 %}
 8573 
 8574 //----------Subtraction Instructions-------------------------------------------
 8575 
 8576 // Integer Subtraction Instructions
 8577 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8578 %{
 8579   match(Set dst (SubI dst src));
 8580   effect(KILL cr);
 8581 
 8582   format %{ "subl    $dst, $src\t# int" %}
 8583   ins_encode %{
 8584     __ subl($dst$$Register, $src$$Register);
 8585   %}
 8586   ins_pipe(ialu_reg_reg);
 8587 %}
 8588 
 8589 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 8590 %{
 8591   match(Set dst (SubI dst (LoadI src)));
 8592   effect(KILL cr);
 8593 
 8594   ins_cost(150);
 8595   format %{ "subl    $dst, $src\t# int" %}
 8596   ins_encode %{
 8597     __ subl($dst$$Register, $src$$Address);
 8598   %}
 8599   ins_pipe(ialu_reg_mem);
 8600 %}
 8601 
 8602 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 8603 %{
 8604   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 8605   effect(KILL cr);
 8606 
 8607   ins_cost(150);
 8608   format %{ "subl    $dst, $src\t# int" %}
 8609   ins_encode %{
 8610     __ subl($dst$$Address, $src$$Register);
 8611   %}
 8612   ins_pipe(ialu_mem_reg);
 8613 %}
 8614 
 8615 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8616 %{
 8617   match(Set dst (SubL dst src));
 8618   effect(KILL cr);
 8619 
 8620   format %{ "subq    $dst, $src\t# long" %}
 8621   ins_encode %{
 8622     __ subq($dst$$Register, $src$$Register);
 8623   %}
 8624   ins_pipe(ialu_reg_reg);
 8625 %}
 8626 
 8627 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 8628 %{
 8629   match(Set dst (SubL dst (LoadL src)));
 8630   effect(KILL cr);
 8631 
 8632   ins_cost(150);
 8633   format %{ "subq    $dst, $src\t# long" %}
 8634   ins_encode %{
 8635     __ subq($dst$$Register, $src$$Address);
 8636   %}
 8637   ins_pipe(ialu_reg_mem);
 8638 %}
 8639 
 8640 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 8641 %{
 8642   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 8643   effect(KILL cr);
 8644 
 8645   ins_cost(150);
 8646   format %{ "subq    $dst, $src\t# long" %}
 8647   ins_encode %{
 8648     __ subq($dst$$Address, $src$$Register);
 8649   %}
 8650   ins_pipe(ialu_mem_reg);
 8651 %}
 8652 
 8653 // Subtract from a pointer
 8654 // XXX hmpf???
 8655 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 8656 %{
 8657   match(Set dst (AddP dst (SubI zero src)));
 8658   effect(KILL cr);
 8659 
 8660   format %{ "subq    $dst, $src\t# ptr - int" %}
 8661   opcode(0x2B);
 8662   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
 8663   ins_pipe(ialu_reg_reg);
 8664 %}
 8665 
 8666 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 8667 %{
 8668   match(Set dst (SubI zero dst));
 8669   effect(KILL cr);
 8670 
 8671   format %{ "negl    $dst\t# int" %}
 8672   ins_encode %{
 8673     __ negl($dst$$Register);
 8674   %}
 8675   ins_pipe(ialu_reg);
 8676 %}
 8677 
 8678 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 8679 %{
 8680   match(Set dst (NegI dst));
 8681   effect(KILL cr);
 8682 
 8683   format %{ "negl    $dst\t# int" %}
 8684   ins_encode %{
 8685     __ negl($dst$$Register);
 8686   %}
 8687   ins_pipe(ialu_reg);
 8688 %}
 8689 
 8690 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 8691 %{
 8692   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 8693   effect(KILL cr);
 8694 
 8695   format %{ "negl    $dst\t# int" %}
 8696   ins_encode %{
 8697     __ negl($dst$$Address);
 8698   %}
 8699   ins_pipe(ialu_reg);
 8700 %}
 8701 
 8702 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 8703 %{
 8704   match(Set dst (SubL zero dst));
 8705   effect(KILL cr);
 8706 
 8707   format %{ "negq    $dst\t# long" %}
 8708   ins_encode %{
 8709     __ negq($dst$$Register);
 8710   %}
 8711   ins_pipe(ialu_reg);
 8712 %}
 8713 
 8714 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 8715 %{
 8716   match(Set dst (NegL dst));
 8717   effect(KILL cr);
 8718 
 8719   format %{ "negq    $dst\t# int" %}
 8720   ins_encode %{
 8721     __ negq($dst$$Register);
 8722   %}
 8723   ins_pipe(ialu_reg);
 8724 %}
 8725 
 8726 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 8727 %{
 8728   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 8729   effect(KILL cr);
 8730 
 8731   format %{ "negq    $dst\t# long" %}
 8732   ins_encode %{
 8733     __ negq($dst$$Address);
 8734   %}
 8735   ins_pipe(ialu_reg);
 8736 %}
 8737 
 8738 //----------Multiplication/Division Instructions-------------------------------
 8739 // Integer Multiplication Instructions
 8740 // Multiply Register
 8741 
 8742 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8743 %{
 8744   match(Set dst (MulI dst src));
 8745   effect(KILL cr);
 8746 
 8747   ins_cost(300);
 8748   format %{ "imull   $dst, $src\t# int" %}
 8749   ins_encode %{
 8750     __ imull($dst$$Register, $src$$Register);
 8751   %}
 8752   ins_pipe(ialu_reg_reg_alu0);
 8753 %}
 8754 
 8755 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 8756 %{
 8757   match(Set dst (MulI src imm));
 8758   effect(KILL cr);
 8759 
 8760   ins_cost(300);
 8761   format %{ "imull   $dst, $src, $imm\t# int" %}
 8762   ins_encode %{
 8763     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 8764   %}
 8765   ins_pipe(ialu_reg_reg_alu0);
 8766 %}
 8767 
 8768 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 8769 %{
 8770   match(Set dst (MulI dst (LoadI src)));
 8771   effect(KILL cr);
 8772 
 8773   ins_cost(350);
 8774   format %{ "imull   $dst, $src\t# int" %}
 8775   ins_encode %{
 8776     __ imull($dst$$Register, $src$$Address);
 8777   %}
 8778   ins_pipe(ialu_reg_mem_alu0);
 8779 %}
 8780 
 8781 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 8782 %{
 8783   match(Set dst (MulI (LoadI src) imm));
 8784   effect(KILL cr);
 8785 
 8786   ins_cost(300);
 8787   format %{ "imull   $dst, $src, $imm\t# int" %}
 8788   ins_encode %{
 8789     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 8790   %}
 8791   ins_pipe(ialu_reg_mem_alu0);
 8792 %}
 8793 
 8794 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 8795 %{
 8796   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 8797   effect(KILL cr, KILL src2);
 8798 
 8799   expand %{ mulI_rReg(dst, src1, cr);
 8800            mulI_rReg(src2, src3, cr);
 8801            addI_rReg(dst, src2, cr); %}
 8802 %}
 8803 
 8804 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8805 %{
 8806   match(Set dst (MulL dst src));
 8807   effect(KILL cr);
 8808 
 8809   ins_cost(300);
 8810   format %{ "imulq   $dst, $src\t# long" %}
 8811   ins_encode %{
 8812     __ imulq($dst$$Register, $src$$Register);
 8813   %}
 8814   ins_pipe(ialu_reg_reg_alu0);
 8815 %}
 8816 
 8817 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 8818 %{
 8819   match(Set dst (MulL src imm));
 8820   effect(KILL cr);
 8821 
 8822   ins_cost(300);
 8823   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8824   ins_encode %{
 8825     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 8826   %}
 8827   ins_pipe(ialu_reg_reg_alu0);
 8828 %}
 8829 
 8830 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 8831 %{
 8832   match(Set dst (MulL dst (LoadL src)));
 8833   effect(KILL cr);
 8834 
 8835   ins_cost(350);
 8836   format %{ "imulq   $dst, $src\t# long" %}
 8837   ins_encode %{
 8838     __ imulq($dst$$Register, $src$$Address);
 8839   %}
 8840   ins_pipe(ialu_reg_mem_alu0);
 8841 %}
 8842 
 8843 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 8844 %{
 8845   match(Set dst (MulL (LoadL src) imm));
 8846   effect(KILL cr);
 8847 
 8848   ins_cost(300);
 8849   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8850   ins_encode %{
 8851     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 8852   %}
 8853   ins_pipe(ialu_reg_mem_alu0);
 8854 %}
 8855 
 8856 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8857 %{
 8858   match(Set dst (MulHiL src rax));
 8859   effect(USE_KILL rax, KILL cr);
 8860 
 8861   ins_cost(300);
 8862   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 8863   ins_encode %{
 8864     __ imulq($src$$Register);
 8865   %}
 8866   ins_pipe(ialu_reg_reg_alu0);
 8867 %}
 8868 
 8869 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8870 %{
 8871   match(Set dst (UMulHiL src rax));
 8872   effect(USE_KILL rax, KILL cr);
 8873 
 8874   ins_cost(300);
 8875   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8876   ins_encode %{
 8877     __ mulq($src$$Register);
 8878   %}
 8879   ins_pipe(ialu_reg_reg_alu0);
 8880 %}
 8881 
 8882 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8883                    rFlagsReg cr)
 8884 %{
 8885   match(Set rax (DivI rax div));
 8886   effect(KILL rdx, KILL cr);
 8887 
 8888   ins_cost(30*100+10*100); // XXX
 8889   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8890             "jne,s   normal\n\t"
 8891             "xorl    rdx, rdx\n\t"
 8892             "cmpl    $div, -1\n\t"
 8893             "je,s    done\n"
 8894     "normal: cdql\n\t"
 8895             "idivl   $div\n"
 8896     "done:"        %}
 8897   ins_encode(cdql_enc(div));
 8898   ins_pipe(ialu_reg_reg_alu0);
 8899 %}
 8900 
 8901 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8902                    rFlagsReg cr)
 8903 %{
 8904   match(Set rax (DivL rax div));
 8905   effect(KILL rdx, KILL cr);
 8906 
 8907   ins_cost(30*100+10*100); // XXX
 8908   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8909             "cmpq    rax, rdx\n\t"
 8910             "jne,s   normal\n\t"
 8911             "xorl    rdx, rdx\n\t"
 8912             "cmpq    $div, -1\n\t"
 8913             "je,s    done\n"
 8914     "normal: cdqq\n\t"
 8915             "idivq   $div\n"
 8916     "done:"        %}
 8917   ins_encode(cdqq_enc(div));
 8918   ins_pipe(ialu_reg_reg_alu0);
 8919 %}
 8920 
 8921 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8922 %{
 8923   match(Set rax (UDivI rax div));
 8924   effect(KILL rdx, KILL cr);
 8925 
 8926   ins_cost(300);
 8927   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8928   ins_encode %{
 8929     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8930   %}
 8931   ins_pipe(ialu_reg_reg_alu0);
 8932 %}
 8933 
 8934 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8935 %{
 8936   match(Set rax (UDivL rax div));
 8937   effect(KILL rdx, KILL cr);
 8938 
 8939   ins_cost(300);
 8940   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8941   ins_encode %{
 8942      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8943   %}
 8944   ins_pipe(ialu_reg_reg_alu0);
 8945 %}
 8946 
 8947 // Integer DIVMOD with Register, both quotient and mod results
 8948 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8949                              rFlagsReg cr)
 8950 %{
 8951   match(DivModI rax div);
 8952   effect(KILL cr);
 8953 
 8954   ins_cost(30*100+10*100); // XXX
 8955   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8956             "jne,s   normal\n\t"
 8957             "xorl    rdx, rdx\n\t"
 8958             "cmpl    $div, -1\n\t"
 8959             "je,s    done\n"
 8960     "normal: cdql\n\t"
 8961             "idivl   $div\n"
 8962     "done:"        %}
 8963   ins_encode(cdql_enc(div));
 8964   ins_pipe(pipe_slow);
 8965 %}
 8966 
 8967 // Long DIVMOD with Register, both quotient and mod results
 8968 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8969                              rFlagsReg cr)
 8970 %{
 8971   match(DivModL rax div);
 8972   effect(KILL cr);
 8973 
 8974   ins_cost(30*100+10*100); // XXX
 8975   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8976             "cmpq    rax, rdx\n\t"
 8977             "jne,s   normal\n\t"
 8978             "xorl    rdx, rdx\n\t"
 8979             "cmpq    $div, -1\n\t"
 8980             "je,s    done\n"
 8981     "normal: cdqq\n\t"
 8982             "idivq   $div\n"
 8983     "done:"        %}
 8984   ins_encode(cdqq_enc(div));
 8985   ins_pipe(pipe_slow);
 8986 %}
 8987 
 8988 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8989 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 8990                               no_rax_rdx_RegI div, rFlagsReg cr)
 8991 %{
 8992   match(UDivModI rax div);
 8993   effect(TEMP tmp, KILL cr);
 8994 
 8995   ins_cost(300);
 8996   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 8997             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 8998           %}
 8999   ins_encode %{
 9000     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 9001   %}
 9002   ins_pipe(pipe_slow);
 9003 %}
 9004 
 9005 // Unsigned long DIVMOD with Register, both quotient and mod results
 9006 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 9007                               no_rax_rdx_RegL div, rFlagsReg cr)
 9008 %{
 9009   match(UDivModL rax div);
 9010   effect(TEMP tmp, KILL cr);
 9011 
 9012   ins_cost(300);
 9013   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 9014             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 9015           %}
 9016   ins_encode %{
 9017     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 9018   %}
 9019   ins_pipe(pipe_slow);
 9020 %}
 9021 
 9022 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 9023                    rFlagsReg cr)
 9024 %{
 9025   match(Set rdx (ModI rax div));
 9026   effect(KILL rax, KILL cr);
 9027 
 9028   ins_cost(300); // XXX
 9029   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 9030             "jne,s   normal\n\t"
 9031             "xorl    rdx, rdx\n\t"
 9032             "cmpl    $div, -1\n\t"
 9033             "je,s    done\n"
 9034     "normal: cdql\n\t"
 9035             "idivl   $div\n"
 9036     "done:"        %}
 9037   ins_encode(cdql_enc(div));
 9038   ins_pipe(ialu_reg_reg_alu0);
 9039 %}
 9040 
 9041 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 9042                    rFlagsReg cr)
 9043 %{
 9044   match(Set rdx (ModL rax div));
 9045   effect(KILL rax, KILL cr);
 9046 
 9047   ins_cost(300); // XXX
 9048   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 9049             "cmpq    rax, rdx\n\t"
 9050             "jne,s   normal\n\t"
 9051             "xorl    rdx, rdx\n\t"
 9052             "cmpq    $div, -1\n\t"
 9053             "je,s    done\n"
 9054     "normal: cdqq\n\t"
 9055             "idivq   $div\n"
 9056     "done:"        %}
 9057   ins_encode(cdqq_enc(div));
 9058   ins_pipe(ialu_reg_reg_alu0);
 9059 %}
 9060 
 9061 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 9062 %{
 9063   match(Set rdx (UModI rax div));
 9064   effect(KILL rax, KILL cr);
 9065 
 9066   ins_cost(300);
 9067   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 9068   ins_encode %{
 9069     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 9070   %}
 9071   ins_pipe(ialu_reg_reg_alu0);
 9072 %}
 9073 
 9074 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 9075 %{
 9076   match(Set rdx (UModL rax div));
 9077   effect(KILL rax, KILL cr);
 9078 
 9079   ins_cost(300);
 9080   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 9081   ins_encode %{
 9082     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 9083   %}
 9084   ins_pipe(ialu_reg_reg_alu0);
 9085 %}
 9086 
 9087 // Integer Shift Instructions
 9088 // Shift Left by one, two, three
 9089 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
 9090 %{
 9091   match(Set dst (LShiftI dst shift));
 9092   effect(KILL cr);
 9093 
 9094   format %{ "sall    $dst, $shift" %}
 9095   ins_encode %{
 9096     __ sall($dst$$Register, $shift$$constant);
 9097   %}
 9098   ins_pipe(ialu_reg);
 9099 %}
 9100 
 9101 // Shift Left by 8-bit immediate
 9102 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9103 %{
 9104   match(Set dst (LShiftI dst shift));
 9105   effect(KILL cr);
 9106 
 9107   format %{ "sall    $dst, $shift" %}
 9108   ins_encode %{
 9109     __ sall($dst$$Register, $shift$$constant);
 9110   %}
 9111   ins_pipe(ialu_reg);
 9112 %}
 9113 
 9114 // Shift Left by 8-bit immediate
 9115 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9116 %{
 9117   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9118   effect(KILL cr);
 9119 
 9120   format %{ "sall    $dst, $shift" %}
 9121   ins_encode %{
 9122     __ sall($dst$$Address, $shift$$constant);
 9123   %}
 9124   ins_pipe(ialu_mem_imm);
 9125 %}
 9126 
 9127 // Shift Left by variable
 9128 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9129 %{
 9130   predicate(!VM_Version::supports_bmi2());
 9131   match(Set dst (LShiftI dst shift));
 9132   effect(KILL cr);
 9133 
 9134   format %{ "sall    $dst, $shift" %}
 9135   ins_encode %{
 9136     __ sall($dst$$Register);
 9137   %}
 9138   ins_pipe(ialu_reg_reg);
 9139 %}
 9140 
 9141 // Shift Left by variable
 9142 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9143 %{
 9144   predicate(!VM_Version::supports_bmi2());
 9145   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9146   effect(KILL cr);
 9147 
 9148   format %{ "sall    $dst, $shift" %}
 9149   ins_encode %{
 9150     __ sall($dst$$Address);
 9151   %}
 9152   ins_pipe(ialu_mem_reg);
 9153 %}
 9154 
 9155 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9156 %{
 9157   predicate(VM_Version::supports_bmi2());
 9158   match(Set dst (LShiftI src shift));
 9159 
 9160   format %{ "shlxl   $dst, $src, $shift" %}
 9161   ins_encode %{
 9162     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 9163   %}
 9164   ins_pipe(ialu_reg_reg);
 9165 %}
 9166 
 9167 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9168 %{
 9169   predicate(VM_Version::supports_bmi2());
 9170   match(Set dst (LShiftI (LoadI src) shift));
 9171   ins_cost(175);
 9172   format %{ "shlxl   $dst, $src, $shift" %}
 9173   ins_encode %{
 9174     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 9175   %}
 9176   ins_pipe(ialu_reg_mem);
 9177 %}
 9178 
 9179 // Arithmetic Shift Right by 8-bit immediate
 9180 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9181 %{
 9182   match(Set dst (RShiftI dst shift));
 9183   effect(KILL cr);
 9184 
 9185   format %{ "sarl    $dst, $shift" %}
 9186   ins_encode %{
 9187     __ sarl($dst$$Register, $shift$$constant);
 9188   %}
 9189   ins_pipe(ialu_mem_imm);
 9190 %}
 9191 
 9192 // Arithmetic Shift Right by 8-bit immediate
 9193 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9194 %{
 9195   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9196   effect(KILL cr);
 9197 
 9198   format %{ "sarl    $dst, $shift" %}
 9199   ins_encode %{
 9200     __ sarl($dst$$Address, $shift$$constant);
 9201   %}
 9202   ins_pipe(ialu_mem_imm);
 9203 %}
 9204 
 9205 // Arithmetic Shift Right by variable
 9206 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9207 %{
 9208   predicate(!VM_Version::supports_bmi2());
 9209   match(Set dst (RShiftI dst shift));
 9210   effect(KILL cr);
 9211   format %{ "sarl    $dst, $shift" %}
 9212   ins_encode %{
 9213     __ sarl($dst$$Register);
 9214   %}
 9215   ins_pipe(ialu_reg_reg);
 9216 %}
 9217 
 9218 // Arithmetic Shift Right by variable
 9219 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9220 %{
 9221   predicate(!VM_Version::supports_bmi2());
 9222   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9223   effect(KILL cr);
 9224 
 9225   format %{ "sarl    $dst, $shift" %}
 9226   ins_encode %{
 9227     __ sarl($dst$$Address);
 9228   %}
 9229   ins_pipe(ialu_mem_reg);
 9230 %}
 9231 
 9232 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9233 %{
 9234   predicate(VM_Version::supports_bmi2());
 9235   match(Set dst (RShiftI src shift));
 9236 
 9237   format %{ "sarxl   $dst, $src, $shift" %}
 9238   ins_encode %{
 9239     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 9240   %}
 9241   ins_pipe(ialu_reg_reg);
 9242 %}
 9243 
 9244 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9245 %{
 9246   predicate(VM_Version::supports_bmi2());
 9247   match(Set dst (RShiftI (LoadI src) shift));
 9248   ins_cost(175);
 9249   format %{ "sarxl   $dst, $src, $shift" %}
 9250   ins_encode %{
 9251     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 9252   %}
 9253   ins_pipe(ialu_reg_mem);
 9254 %}
 9255 
 9256 // Logical Shift Right by 8-bit immediate
 9257 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9258 %{
 9259   match(Set dst (URShiftI dst shift));
 9260   effect(KILL cr);
 9261 
 9262   format %{ "shrl    $dst, $shift" %}
 9263   ins_encode %{
 9264     __ shrl($dst$$Register, $shift$$constant);
 9265   %}
 9266   ins_pipe(ialu_reg);
 9267 %}
 9268 
 9269 // Logical Shift Right by 8-bit immediate
 9270 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9271 %{
 9272   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9273   effect(KILL cr);
 9274 
 9275   format %{ "shrl    $dst, $shift" %}
 9276   ins_encode %{
 9277     __ shrl($dst$$Address, $shift$$constant);
 9278   %}
 9279   ins_pipe(ialu_mem_imm);
 9280 %}
 9281 
 9282 // Logical Shift Right by variable
 9283 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9284 %{
 9285   predicate(!VM_Version::supports_bmi2());
 9286   match(Set dst (URShiftI dst shift));
 9287   effect(KILL cr);
 9288 
 9289   format %{ "shrl    $dst, $shift" %}
 9290   ins_encode %{
 9291     __ shrl($dst$$Register);
 9292   %}
 9293   ins_pipe(ialu_reg_reg);
 9294 %}
 9295 
 9296 // Logical Shift Right by variable
 9297 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9298 %{
 9299   predicate(!VM_Version::supports_bmi2());
 9300   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9301   effect(KILL cr);
 9302 
 9303   format %{ "shrl    $dst, $shift" %}
 9304   ins_encode %{
 9305     __ shrl($dst$$Address);
 9306   %}
 9307   ins_pipe(ialu_mem_reg);
 9308 %}
 9309 
 9310 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9311 %{
 9312   predicate(VM_Version::supports_bmi2());
 9313   match(Set dst (URShiftI src shift));
 9314 
 9315   format %{ "shrxl   $dst, $src, $shift" %}
 9316   ins_encode %{
 9317     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 9318   %}
 9319   ins_pipe(ialu_reg_reg);
 9320 %}
 9321 
 9322 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9323 %{
 9324   predicate(VM_Version::supports_bmi2());
 9325   match(Set dst (URShiftI (LoadI src) shift));
 9326   ins_cost(175);
 9327   format %{ "shrxl   $dst, $src, $shift" %}
 9328   ins_encode %{
 9329     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 9330   %}
 9331   ins_pipe(ialu_reg_mem);
 9332 %}
 9333 
 9334 // Long Shift Instructions
 9335 // Shift Left by one, two, three
 9336 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
 9337 %{
 9338   match(Set dst (LShiftL dst shift));
 9339   effect(KILL cr);
 9340 
 9341   format %{ "salq    $dst, $shift" %}
 9342   ins_encode %{
 9343     __ salq($dst$$Register, $shift$$constant);
 9344   %}
 9345   ins_pipe(ialu_reg);
 9346 %}
 9347 
 9348 // Shift Left by 8-bit immediate
 9349 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9350 %{
 9351   match(Set dst (LShiftL dst shift));
 9352   effect(KILL cr);
 9353 
 9354   format %{ "salq    $dst, $shift" %}
 9355   ins_encode %{
 9356     __ salq($dst$$Register, $shift$$constant);
 9357   %}
 9358   ins_pipe(ialu_reg);
 9359 %}
 9360 
 9361 // Shift Left by 8-bit immediate
 9362 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9363 %{
 9364   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9365   effect(KILL cr);
 9366 
 9367   format %{ "salq    $dst, $shift" %}
 9368   ins_encode %{
 9369     __ salq($dst$$Address, $shift$$constant);
 9370   %}
 9371   ins_pipe(ialu_mem_imm);
 9372 %}
 9373 
 9374 // Shift Left by variable
 9375 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9376 %{
 9377   predicate(!VM_Version::supports_bmi2());
 9378   match(Set dst (LShiftL dst shift));
 9379   effect(KILL cr);
 9380 
 9381   format %{ "salq    $dst, $shift" %}
 9382   ins_encode %{
 9383     __ salq($dst$$Register);
 9384   %}
 9385   ins_pipe(ialu_reg_reg);
 9386 %}
 9387 
 9388 // Shift Left by variable
 9389 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9390 %{
 9391   predicate(!VM_Version::supports_bmi2());
 9392   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9393   effect(KILL cr);
 9394 
 9395   format %{ "salq    $dst, $shift" %}
 9396   ins_encode %{
 9397     __ salq($dst$$Address);
 9398   %}
 9399   ins_pipe(ialu_mem_reg);
 9400 %}
 9401 
 9402 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9403 %{
 9404   predicate(VM_Version::supports_bmi2());
 9405   match(Set dst (LShiftL src shift));
 9406 
 9407   format %{ "shlxq   $dst, $src, $shift" %}
 9408   ins_encode %{
 9409     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 9410   %}
 9411   ins_pipe(ialu_reg_reg);
 9412 %}
 9413 
 9414 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9415 %{
 9416   predicate(VM_Version::supports_bmi2());
 9417   match(Set dst (LShiftL (LoadL src) shift));
 9418   ins_cost(175);
 9419   format %{ "shlxq   $dst, $src, $shift" %}
 9420   ins_encode %{
 9421     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 9422   %}
 9423   ins_pipe(ialu_reg_mem);
 9424 %}
 9425 
 9426 // Arithmetic Shift Right by 8-bit immediate
 9427 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 9428 %{
 9429   match(Set dst (RShiftL dst shift));
 9430   effect(KILL cr);
 9431 
 9432   format %{ "sarq    $dst, $shift" %}
 9433   ins_encode %{
 9434     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 9435   %}
 9436   ins_pipe(ialu_mem_imm);
 9437 %}
 9438 
 9439 // Arithmetic Shift Right by 8-bit immediate
 9440 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 9441 %{
 9442   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9443   effect(KILL cr);
 9444 
 9445   format %{ "sarq    $dst, $shift" %}
 9446   ins_encode %{
 9447     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 9448   %}
 9449   ins_pipe(ialu_mem_imm);
 9450 %}
 9451 
 9452 // Arithmetic Shift Right by variable
 9453 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9454 %{
 9455   predicate(!VM_Version::supports_bmi2());
 9456   match(Set dst (RShiftL dst shift));
 9457   effect(KILL cr);
 9458 
 9459   format %{ "sarq    $dst, $shift" %}
 9460   ins_encode %{
 9461     __ sarq($dst$$Register);
 9462   %}
 9463   ins_pipe(ialu_reg_reg);
 9464 %}
 9465 
 9466 // Arithmetic Shift Right by variable
 9467 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9468 %{
 9469   predicate(!VM_Version::supports_bmi2());
 9470   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9471   effect(KILL cr);
 9472 
 9473   format %{ "sarq    $dst, $shift" %}
 9474   ins_encode %{
 9475     __ sarq($dst$$Address);
 9476   %}
 9477   ins_pipe(ialu_mem_reg);
 9478 %}
 9479 
 9480 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9481 %{
 9482   predicate(VM_Version::supports_bmi2());
 9483   match(Set dst (RShiftL src shift));
 9484 
 9485   format %{ "sarxq   $dst, $src, $shift" %}
 9486   ins_encode %{
 9487     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 9488   %}
 9489   ins_pipe(ialu_reg_reg);
 9490 %}
 9491 
 9492 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9493 %{
 9494   predicate(VM_Version::supports_bmi2());
 9495   match(Set dst (RShiftL (LoadL src) shift));
 9496   ins_cost(175);
 9497   format %{ "sarxq   $dst, $src, $shift" %}
 9498   ins_encode %{
 9499     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 9500   %}
 9501   ins_pipe(ialu_reg_mem);
 9502 %}
 9503 
 9504 // Logical Shift Right by 8-bit immediate
 9505 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9506 %{
 9507   match(Set dst (URShiftL dst shift));
 9508   effect(KILL cr);
 9509 
 9510   format %{ "shrq    $dst, $shift" %}
 9511   ins_encode %{
 9512     __ shrq($dst$$Register, $shift$$constant);
 9513   %}
 9514   ins_pipe(ialu_reg);
 9515 %}
 9516 
 9517 // Logical Shift Right by 8-bit immediate
 9518 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9519 %{
 9520   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9521   effect(KILL cr);
 9522 
 9523   format %{ "shrq    $dst, $shift" %}
 9524   ins_encode %{
 9525     __ shrq($dst$$Address, $shift$$constant);
 9526   %}
 9527   ins_pipe(ialu_mem_imm);
 9528 %}
 9529 
 9530 // Logical Shift Right by variable
 9531 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9532 %{
 9533   predicate(!VM_Version::supports_bmi2());
 9534   match(Set dst (URShiftL dst shift));
 9535   effect(KILL cr);
 9536 
 9537   format %{ "shrq    $dst, $shift" %}
 9538   ins_encode %{
 9539     __ shrq($dst$$Register);
 9540   %}
 9541   ins_pipe(ialu_reg_reg);
 9542 %}
 9543 
 9544 // Logical Shift Right by variable
 9545 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9546 %{
 9547   predicate(!VM_Version::supports_bmi2());
 9548   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9549   effect(KILL cr);
 9550 
 9551   format %{ "shrq    $dst, $shift" %}
 9552   ins_encode %{
 9553     __ shrq($dst$$Address);
 9554   %}
 9555   ins_pipe(ialu_mem_reg);
 9556 %}
 9557 
 9558 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9559 %{
 9560   predicate(VM_Version::supports_bmi2());
 9561   match(Set dst (URShiftL src shift));
 9562 
 9563   format %{ "shrxq   $dst, $src, $shift" %}
 9564   ins_encode %{
 9565     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 9566   %}
 9567   ins_pipe(ialu_reg_reg);
 9568 %}
 9569 
 9570 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9571 %{
 9572   predicate(VM_Version::supports_bmi2());
 9573   match(Set dst (URShiftL (LoadL src) shift));
 9574   ins_cost(175);
 9575   format %{ "shrxq   $dst, $src, $shift" %}
 9576   ins_encode %{
 9577     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 9578   %}
 9579   ins_pipe(ialu_reg_mem);
 9580 %}
 9581 
 9582 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 9583 // This idiom is used by the compiler for the i2b bytecode.
 9584 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 9585 %{
 9586   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 9587 
 9588   format %{ "movsbl  $dst, $src\t# i2b" %}
 9589   ins_encode %{
 9590     __ movsbl($dst$$Register, $src$$Register);
 9591   %}
 9592   ins_pipe(ialu_reg_reg);
 9593 %}
 9594 
 9595 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 9596 // This idiom is used by the compiler the i2s bytecode.
 9597 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 9598 %{
 9599   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 9600 
 9601   format %{ "movswl  $dst, $src\t# i2s" %}
 9602   ins_encode %{
 9603     __ movswl($dst$$Register, $src$$Register);
 9604   %}
 9605   ins_pipe(ialu_reg_reg);
 9606 %}
 9607 
 9608 // ROL/ROR instructions
 9609 
 9610 // Rotate left by constant.
 9611 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9612 %{
 9613   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9614   match(Set dst (RotateLeft dst shift));
 9615   effect(KILL cr);
 9616   format %{ "roll    $dst, $shift" %}
 9617   ins_encode %{
 9618     __ roll($dst$$Register, $shift$$constant);
 9619   %}
 9620   ins_pipe(ialu_reg);
 9621 %}
 9622 
 9623 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 9624 %{
 9625   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9626   match(Set dst (RotateLeft src shift));
 9627   format %{ "rolxl   $dst, $src, $shift" %}
 9628   ins_encode %{
 9629     int shift = 32 - ($shift$$constant & 31);
 9630     __ rorxl($dst$$Register, $src$$Register, shift);
 9631   %}
 9632   ins_pipe(ialu_reg_reg);
 9633 %}
 9634 
 9635 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9636 %{
 9637   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9638   match(Set dst (RotateLeft (LoadI src) shift));
 9639   ins_cost(175);
 9640   format %{ "rolxl   $dst, $src, $shift" %}
 9641   ins_encode %{
 9642     int shift = 32 - ($shift$$constant & 31);
 9643     __ rorxl($dst$$Register, $src$$Address, shift);
 9644   %}
 9645   ins_pipe(ialu_reg_mem);
 9646 %}
 9647 
 9648 // Rotate Left by variable
 9649 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9650 %{
 9651   predicate(n->bottom_type()->basic_type() == T_INT);
 9652   match(Set dst (RotateLeft dst shift));
 9653   effect(KILL cr);
 9654   format %{ "roll    $dst, $shift" %}
 9655   ins_encode %{
 9656     __ roll($dst$$Register);
 9657   %}
 9658   ins_pipe(ialu_reg_reg);
 9659 %}
 9660 
 9661 // Rotate Right by constant.
 9662 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9663 %{
 9664   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9665   match(Set dst (RotateRight dst shift));
 9666   effect(KILL cr);
 9667   format %{ "rorl    $dst, $shift" %}
 9668   ins_encode %{
 9669     __ rorl($dst$$Register, $shift$$constant);
 9670   %}
 9671   ins_pipe(ialu_reg);
 9672 %}
 9673 
 9674 // Rotate Right by constant.
 9675 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 9676 %{
 9677   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9678   match(Set dst (RotateRight src shift));
 9679   format %{ "rorxl   $dst, $src, $shift" %}
 9680   ins_encode %{
 9681     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 9682   %}
 9683   ins_pipe(ialu_reg_reg);
 9684 %}
 9685 
 9686 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9687 %{
 9688   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9689   match(Set dst (RotateRight (LoadI src) shift));
 9690   ins_cost(175);
 9691   format %{ "rorxl   $dst, $src, $shift" %}
 9692   ins_encode %{
 9693     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 9694   %}
 9695   ins_pipe(ialu_reg_mem);
 9696 %}
 9697 
 9698 // Rotate Right by variable
 9699 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9700 %{
 9701   predicate(n->bottom_type()->basic_type() == T_INT);
 9702   match(Set dst (RotateRight dst shift));
 9703   effect(KILL cr);
 9704   format %{ "rorl    $dst, $shift" %}
 9705   ins_encode %{
 9706     __ rorl($dst$$Register);
 9707   %}
 9708   ins_pipe(ialu_reg_reg);
 9709 %}
 9710 
 9711 // Rotate Left by constant.
 9712 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9713 %{
 9714   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9715   match(Set dst (RotateLeft dst shift));
 9716   effect(KILL cr);
 9717   format %{ "rolq    $dst, $shift" %}
 9718   ins_encode %{
 9719     __ rolq($dst$$Register, $shift$$constant);
 9720   %}
 9721   ins_pipe(ialu_reg);
 9722 %}
 9723 
 9724 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 9725 %{
 9726   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9727   match(Set dst (RotateLeft src shift));
 9728   format %{ "rolxq   $dst, $src, $shift" %}
 9729   ins_encode %{
 9730     int shift = 64 - ($shift$$constant & 63);
 9731     __ rorxq($dst$$Register, $src$$Register, shift);
 9732   %}
 9733   ins_pipe(ialu_reg_reg);
 9734 %}
 9735 
 9736 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9737 %{
 9738   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9739   match(Set dst (RotateLeft (LoadL src) shift));
 9740   ins_cost(175);
 9741   format %{ "rolxq   $dst, $src, $shift" %}
 9742   ins_encode %{
 9743     int shift = 64 - ($shift$$constant & 63);
 9744     __ rorxq($dst$$Register, $src$$Address, shift);
 9745   %}
 9746   ins_pipe(ialu_reg_mem);
 9747 %}
 9748 
 9749 // Rotate Left by variable
 9750 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9751 %{
 9752   predicate(n->bottom_type()->basic_type() == T_LONG);
 9753   match(Set dst (RotateLeft dst shift));
 9754   effect(KILL cr);
 9755   format %{ "rolq    $dst, $shift" %}
 9756   ins_encode %{
 9757     __ rolq($dst$$Register);
 9758   %}
 9759   ins_pipe(ialu_reg_reg);
 9760 %}
 9761 
 9762 // Rotate Right by constant.
 9763 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9764 %{
 9765   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9766   match(Set dst (RotateRight dst shift));
 9767   effect(KILL cr);
 9768   format %{ "rorq    $dst, $shift" %}
 9769   ins_encode %{
 9770     __ rorq($dst$$Register, $shift$$constant);
 9771   %}
 9772   ins_pipe(ialu_reg);
 9773 %}
 9774 
 9775 // Rotate Right by constant
 9776 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 9777 %{
 9778   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9779   match(Set dst (RotateRight src shift));
 9780   format %{ "rorxq   $dst, $src, $shift" %}
 9781   ins_encode %{
 9782     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 9783   %}
 9784   ins_pipe(ialu_reg_reg);
 9785 %}
 9786 
 9787 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9788 %{
 9789   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9790   match(Set dst (RotateRight (LoadL src) shift));
 9791   ins_cost(175);
 9792   format %{ "rorxq   $dst, $src, $shift" %}
 9793   ins_encode %{
 9794     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 9795   %}
 9796   ins_pipe(ialu_reg_mem);
 9797 %}
 9798 
 9799 // Rotate Right by variable
 9800 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9801 %{
 9802   predicate(n->bottom_type()->basic_type() == T_LONG);
 9803   match(Set dst (RotateRight dst shift));
 9804   effect(KILL cr);
 9805   format %{ "rorq    $dst, $shift" %}
 9806   ins_encode %{
 9807     __ rorq($dst$$Register);
 9808   %}
 9809   ins_pipe(ialu_reg_reg);
 9810 %}
 9811 
 9812 //----------------------------- CompressBits/ExpandBits ------------------------
 9813 
 9814 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9815   predicate(n->bottom_type()->isa_long());
 9816   match(Set dst (CompressBits src mask));
 9817   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9818   ins_encode %{
 9819     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 9820   %}
 9821   ins_pipe( pipe_slow );
 9822 %}
 9823 
 9824 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9825   predicate(n->bottom_type()->isa_long());
 9826   match(Set dst (ExpandBits src mask));
 9827   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9828   ins_encode %{
 9829     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 9830   %}
 9831   ins_pipe( pipe_slow );
 9832 %}
 9833 
 9834 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9835   predicate(n->bottom_type()->isa_long());
 9836   match(Set dst (CompressBits src (LoadL mask)));
 9837   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9838   ins_encode %{
 9839     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 9840   %}
 9841   ins_pipe( pipe_slow );
 9842 %}
 9843 
 9844 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9845   predicate(n->bottom_type()->isa_long());
 9846   match(Set dst (ExpandBits src (LoadL mask)));
 9847   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9848   ins_encode %{
 9849     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 9850   %}
 9851   ins_pipe( pipe_slow );
 9852 %}
 9853 
 9854 
 9855 // Logical Instructions
 9856 
 9857 // Integer Logical Instructions
 9858 
 9859 // And Instructions
 9860 // And Register with Register
 9861 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9862 %{
 9863   match(Set dst (AndI dst src));
 9864   effect(KILL cr);
 9865 
 9866   format %{ "andl    $dst, $src\t# int" %}
 9867   ins_encode %{
 9868     __ andl($dst$$Register, $src$$Register);
 9869   %}
 9870   ins_pipe(ialu_reg_reg);
 9871 %}
 9872 
 9873 // And Register with Immediate 255
 9874 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9875 %{
 9876   match(Set dst (AndI src mask));
 9877 
 9878   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9879   ins_encode %{
 9880     __ movzbl($dst$$Register, $src$$Register);
 9881   %}
 9882   ins_pipe(ialu_reg);
 9883 %}
 9884 
 9885 // And Register with Immediate 255 and promote to long
 9886 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9887 %{
 9888   match(Set dst (ConvI2L (AndI src mask)));
 9889 
 9890   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9891   ins_encode %{
 9892     __ movzbl($dst$$Register, $src$$Register);
 9893   %}
 9894   ins_pipe(ialu_reg);
 9895 %}
 9896 
 9897 // And Register with Immediate 65535
 9898 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9899 %{
 9900   match(Set dst (AndI src mask));
 9901 
 9902   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9903   ins_encode %{
 9904     __ movzwl($dst$$Register, $src$$Register);
 9905   %}
 9906   ins_pipe(ialu_reg);
 9907 %}
 9908 
 9909 // And Register with Immediate 65535 and promote to long
 9910 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9911 %{
 9912   match(Set dst (ConvI2L (AndI src mask)));
 9913 
 9914   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9915   ins_encode %{
 9916     __ movzwl($dst$$Register, $src$$Register);
 9917   %}
 9918   ins_pipe(ialu_reg);
 9919 %}
 9920 
 9921 // Can skip int2long conversions after AND with small bitmask
 9922 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9923 %{
 9924   predicate(VM_Version::supports_bmi2());
 9925   ins_cost(125);
 9926   effect(TEMP tmp, KILL cr);
 9927   match(Set dst (ConvI2L (AndI src mask)));
 9928   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9929   ins_encode %{
 9930     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9931     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9932   %}
 9933   ins_pipe(ialu_reg_reg);
 9934 %}
 9935 
 9936 // And Register with Immediate
 9937 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9938 %{
 9939   match(Set dst (AndI dst src));
 9940   effect(KILL cr);
 9941 
 9942   format %{ "andl    $dst, $src\t# int" %}
 9943   ins_encode %{
 9944     __ andl($dst$$Register, $src$$constant);
 9945   %}
 9946   ins_pipe(ialu_reg);
 9947 %}
 9948 
 9949 // And Register with Memory
 9950 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9951 %{
 9952   match(Set dst (AndI dst (LoadI src)));
 9953   effect(KILL cr);
 9954 
 9955   ins_cost(150);
 9956   format %{ "andl    $dst, $src\t# int" %}
 9957   ins_encode %{
 9958     __ andl($dst$$Register, $src$$Address);
 9959   %}
 9960   ins_pipe(ialu_reg_mem);
 9961 %}
 9962 
 9963 // And Memory with Register
 9964 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9965 %{
 9966   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9967   effect(KILL cr);
 9968 
 9969   ins_cost(150);
 9970   format %{ "andb    $dst, $src\t# byte" %}
 9971   ins_encode %{
 9972     __ andb($dst$$Address, $src$$Register);
 9973   %}
 9974   ins_pipe(ialu_mem_reg);
 9975 %}
 9976 
 9977 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9978 %{
 9979   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9980   effect(KILL cr);
 9981 
 9982   ins_cost(150);
 9983   format %{ "andl    $dst, $src\t# int" %}
 9984   ins_encode %{
 9985     __ andl($dst$$Address, $src$$Register);
 9986   %}
 9987   ins_pipe(ialu_mem_reg);
 9988 %}
 9989 
 9990 // And Memory with Immediate
 9991 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9992 %{
 9993   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9994   effect(KILL cr);
 9995 
 9996   ins_cost(125);
 9997   format %{ "andl    $dst, $src\t# int" %}
 9998   ins_encode %{
 9999     __ andl($dst$$Address, $src$$constant);
10000   %}
10001   ins_pipe(ialu_mem_imm);
10002 %}
10003 
10004 // BMI1 instructions
10005 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
10006   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
10007   predicate(UseBMI1Instructions);
10008   effect(KILL cr);
10009 
10010   ins_cost(125);
10011   format %{ "andnl  $dst, $src1, $src2" %}
10012 
10013   ins_encode %{
10014     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
10015   %}
10016   ins_pipe(ialu_reg_mem);
10017 %}
10018 
10019 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
10020   match(Set dst (AndI (XorI src1 minus_1) src2));
10021   predicate(UseBMI1Instructions);
10022   effect(KILL cr);
10023 
10024   format %{ "andnl  $dst, $src1, $src2" %}
10025 
10026   ins_encode %{
10027     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
10028   %}
10029   ins_pipe(ialu_reg);
10030 %}
10031 
10032 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
10033   match(Set dst (AndI (SubI imm_zero src) src));
10034   predicate(UseBMI1Instructions);
10035   effect(KILL cr);
10036 
10037   format %{ "blsil  $dst, $src" %}
10038 
10039   ins_encode %{
10040     __ blsil($dst$$Register, $src$$Register);
10041   %}
10042   ins_pipe(ialu_reg);
10043 %}
10044 
10045 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
10046   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
10047   predicate(UseBMI1Instructions);
10048   effect(KILL cr);
10049 
10050   ins_cost(125);
10051   format %{ "blsil  $dst, $src" %}
10052 
10053   ins_encode %{
10054     __ blsil($dst$$Register, $src$$Address);
10055   %}
10056   ins_pipe(ialu_reg_mem);
10057 %}
10058 
10059 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10060 %{
10061   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
10062   predicate(UseBMI1Instructions);
10063   effect(KILL cr);
10064 
10065   ins_cost(125);
10066   format %{ "blsmskl $dst, $src" %}
10067 
10068   ins_encode %{
10069     __ blsmskl($dst$$Register, $src$$Address);
10070   %}
10071   ins_pipe(ialu_reg_mem);
10072 %}
10073 
10074 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10075 %{
10076   match(Set dst (XorI (AddI src minus_1) src));
10077   predicate(UseBMI1Instructions);
10078   effect(KILL cr);
10079 
10080   format %{ "blsmskl $dst, $src" %}
10081 
10082   ins_encode %{
10083     __ blsmskl($dst$$Register, $src$$Register);
10084   %}
10085 
10086   ins_pipe(ialu_reg);
10087 %}
10088 
10089 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10090 %{
10091   match(Set dst (AndI (AddI src minus_1) src) );
10092   predicate(UseBMI1Instructions);
10093   effect(KILL cr);
10094 
10095   format %{ "blsrl  $dst, $src" %}
10096 
10097   ins_encode %{
10098     __ blsrl($dst$$Register, $src$$Register);
10099   %}
10100 
10101   ins_pipe(ialu_reg_mem);
10102 %}
10103 
10104 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10105 %{
10106   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
10107   predicate(UseBMI1Instructions);
10108   effect(KILL cr);
10109 
10110   ins_cost(125);
10111   format %{ "blsrl  $dst, $src" %}
10112 
10113   ins_encode %{
10114     __ blsrl($dst$$Register, $src$$Address);
10115   %}
10116 
10117   ins_pipe(ialu_reg);
10118 %}
10119 
10120 // Or Instructions
10121 // Or Register with Register
10122 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10123 %{
10124   match(Set dst (OrI dst src));
10125   effect(KILL cr);
10126 
10127   format %{ "orl     $dst, $src\t# int" %}
10128   ins_encode %{
10129     __ orl($dst$$Register, $src$$Register);
10130   %}
10131   ins_pipe(ialu_reg_reg);
10132 %}
10133 
10134 // Or Register with Immediate
10135 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10136 %{
10137   match(Set dst (OrI dst src));
10138   effect(KILL cr);
10139 
10140   format %{ "orl     $dst, $src\t# int" %}
10141   ins_encode %{
10142     __ orl($dst$$Register, $src$$constant);
10143   %}
10144   ins_pipe(ialu_reg);
10145 %}
10146 
10147 // Or Register with Memory
10148 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10149 %{
10150   match(Set dst (OrI dst (LoadI src)));
10151   effect(KILL cr);
10152 
10153   ins_cost(150);
10154   format %{ "orl     $dst, $src\t# int" %}
10155   ins_encode %{
10156     __ orl($dst$$Register, $src$$Address);
10157   %}
10158   ins_pipe(ialu_reg_mem);
10159 %}
10160 
10161 // Or Memory with Register
10162 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10163 %{
10164   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
10165   effect(KILL cr);
10166 
10167   ins_cost(150);
10168   format %{ "orb    $dst, $src\t# byte" %}
10169   ins_encode %{
10170     __ orb($dst$$Address, $src$$Register);
10171   %}
10172   ins_pipe(ialu_mem_reg);
10173 %}
10174 
10175 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10176 %{
10177   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10178   effect(KILL cr);
10179 
10180   ins_cost(150);
10181   format %{ "orl     $dst, $src\t# int" %}
10182   ins_encode %{
10183     __ orl($dst$$Address, $src$$Register);
10184   %}
10185   ins_pipe(ialu_mem_reg);
10186 %}
10187 
10188 // Or Memory with Immediate
10189 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10190 %{
10191   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10192   effect(KILL cr);
10193 
10194   ins_cost(125);
10195   format %{ "orl     $dst, $src\t# int" %}
10196   ins_encode %{
10197     __ orl($dst$$Address, $src$$constant);
10198   %}
10199   ins_pipe(ialu_mem_imm);
10200 %}
10201 
10202 // Xor Instructions
10203 // Xor Register with Register
10204 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10205 %{
10206   match(Set dst (XorI dst src));
10207   effect(KILL cr);
10208 
10209   format %{ "xorl    $dst, $src\t# int" %}
10210   ins_encode %{
10211     __ xorl($dst$$Register, $src$$Register);
10212   %}
10213   ins_pipe(ialu_reg_reg);
10214 %}
10215 
10216 // Xor Register with Immediate -1
10217 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10218   match(Set dst (XorI dst imm));
10219 
10220   format %{ "not    $dst" %}
10221   ins_encode %{
10222      __ notl($dst$$Register);
10223   %}
10224   ins_pipe(ialu_reg);
10225 %}
10226 
10227 // Xor Register with Immediate
10228 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10229 %{
10230   match(Set dst (XorI dst src));
10231   effect(KILL cr);
10232 
10233   format %{ "xorl    $dst, $src\t# int" %}
10234   ins_encode %{
10235     __ xorl($dst$$Register, $src$$constant);
10236   %}
10237   ins_pipe(ialu_reg);
10238 %}
10239 
10240 // Xor Register with Memory
10241 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10242 %{
10243   match(Set dst (XorI dst (LoadI src)));
10244   effect(KILL cr);
10245 
10246   ins_cost(150);
10247   format %{ "xorl    $dst, $src\t# int" %}
10248   ins_encode %{
10249     __ xorl($dst$$Register, $src$$Address);
10250   %}
10251   ins_pipe(ialu_reg_mem);
10252 %}
10253 
10254 // Xor Memory with Register
10255 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10256 %{
10257   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
10258   effect(KILL cr);
10259 
10260   ins_cost(150);
10261   format %{ "xorb    $dst, $src\t# byte" %}
10262   ins_encode %{
10263     __ xorb($dst$$Address, $src$$Register);
10264   %}
10265   ins_pipe(ialu_mem_reg);
10266 %}
10267 
10268 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10269 %{
10270   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10271   effect(KILL cr);
10272 
10273   ins_cost(150);
10274   format %{ "xorl    $dst, $src\t# int" %}
10275   ins_encode %{
10276     __ xorl($dst$$Address, $src$$Register);
10277   %}
10278   ins_pipe(ialu_mem_reg);
10279 %}
10280 
10281 // Xor Memory with Immediate
10282 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10283 %{
10284   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10285   effect(KILL cr);
10286 
10287   ins_cost(125);
10288   format %{ "xorl    $dst, $src\t# int" %}
10289   ins_encode %{
10290     __ xorl($dst$$Address, $src$$constant);
10291   %}
10292   ins_pipe(ialu_mem_imm);
10293 %}
10294 
10295 
10296 // Long Logical Instructions
10297 
10298 // And Instructions
10299 // And Register with Register
10300 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10301 %{
10302   match(Set dst (AndL dst src));
10303   effect(KILL cr);
10304 
10305   format %{ "andq    $dst, $src\t# long" %}
10306   ins_encode %{
10307     __ andq($dst$$Register, $src$$Register);
10308   %}
10309   ins_pipe(ialu_reg_reg);
10310 %}
10311 
10312 // And Register with Immediate 255
10313 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
10314 %{
10315   match(Set dst (AndL src mask));
10316 
10317   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
10318   ins_encode %{
10319     // movzbl zeroes out the upper 32-bit and does not need REX.W
10320     __ movzbl($dst$$Register, $src$$Register);
10321   %}
10322   ins_pipe(ialu_reg);
10323 %}
10324 
10325 // And Register with Immediate 65535
10326 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
10327 %{
10328   match(Set dst (AndL src mask));
10329 
10330   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
10331   ins_encode %{
10332     // movzwl zeroes out the upper 32-bit and does not need REX.W
10333     __ movzwl($dst$$Register, $src$$Register);
10334   %}
10335   ins_pipe(ialu_reg);
10336 %}
10337 
10338 // And Register with Immediate
10339 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10340 %{
10341   match(Set dst (AndL dst src));
10342   effect(KILL cr);
10343 
10344   format %{ "andq    $dst, $src\t# long" %}
10345   ins_encode %{
10346     __ andq($dst$$Register, $src$$constant);
10347   %}
10348   ins_pipe(ialu_reg);
10349 %}
10350 
10351 // And Register with Memory
10352 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10353 %{
10354   match(Set dst (AndL dst (LoadL src)));
10355   effect(KILL cr);
10356 
10357   ins_cost(150);
10358   format %{ "andq    $dst, $src\t# long" %}
10359   ins_encode %{
10360     __ andq($dst$$Register, $src$$Address);
10361   %}
10362   ins_pipe(ialu_reg_mem);
10363 %}
10364 
10365 // And Memory with Register
10366 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10367 %{
10368   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10369   effect(KILL cr);
10370 
10371   ins_cost(150);
10372   format %{ "andq    $dst, $src\t# long" %}
10373   ins_encode %{
10374     __ andq($dst$$Address, $src$$Register);
10375   %}
10376   ins_pipe(ialu_mem_reg);
10377 %}
10378 
10379 // And Memory with Immediate
10380 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10381 %{
10382   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10383   effect(KILL cr);
10384 
10385   ins_cost(125);
10386   format %{ "andq    $dst, $src\t# long" %}
10387   ins_encode %{
10388     __ andq($dst$$Address, $src$$constant);
10389   %}
10390   ins_pipe(ialu_mem_imm);
10391 %}
10392 
10393 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
10394 %{
10395   // con should be a pure 64-bit immediate given that not(con) is a power of 2
10396   // because AND/OR works well enough for 8/32-bit values.
10397   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
10398 
10399   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
10400   effect(KILL cr);
10401 
10402   ins_cost(125);
10403   format %{ "btrq    $dst, log2(not($con))\t# long" %}
10404   ins_encode %{
10405     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
10406   %}
10407   ins_pipe(ialu_mem_imm);
10408 %}
10409 
10410 // BMI1 instructions
10411 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10412   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10413   predicate(UseBMI1Instructions);
10414   effect(KILL cr);
10415 
10416   ins_cost(125);
10417   format %{ "andnq  $dst, $src1, $src2" %}
10418 
10419   ins_encode %{
10420     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10421   %}
10422   ins_pipe(ialu_reg_mem);
10423 %}
10424 
10425 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10426   match(Set dst (AndL (XorL src1 minus_1) src2));
10427   predicate(UseBMI1Instructions);
10428   effect(KILL cr);
10429 
10430   format %{ "andnq  $dst, $src1, $src2" %}
10431 
10432   ins_encode %{
10433   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10434   %}
10435   ins_pipe(ialu_reg_mem);
10436 %}
10437 
10438 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10439   match(Set dst (AndL (SubL imm_zero src) src));
10440   predicate(UseBMI1Instructions);
10441   effect(KILL cr);
10442 
10443   format %{ "blsiq  $dst, $src" %}
10444 
10445   ins_encode %{
10446     __ blsiq($dst$$Register, $src$$Register);
10447   %}
10448   ins_pipe(ialu_reg);
10449 %}
10450 
10451 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10452   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10453   predicate(UseBMI1Instructions);
10454   effect(KILL cr);
10455 
10456   ins_cost(125);
10457   format %{ "blsiq  $dst, $src" %}
10458 
10459   ins_encode %{
10460     __ blsiq($dst$$Register, $src$$Address);
10461   %}
10462   ins_pipe(ialu_reg_mem);
10463 %}
10464 
10465 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10466 %{
10467   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10468   predicate(UseBMI1Instructions);
10469   effect(KILL cr);
10470 
10471   ins_cost(125);
10472   format %{ "blsmskq $dst, $src" %}
10473 
10474   ins_encode %{
10475     __ blsmskq($dst$$Register, $src$$Address);
10476   %}
10477   ins_pipe(ialu_reg_mem);
10478 %}
10479 
10480 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10481 %{
10482   match(Set dst (XorL (AddL src minus_1) src));
10483   predicate(UseBMI1Instructions);
10484   effect(KILL cr);
10485 
10486   format %{ "blsmskq $dst, $src" %}
10487 
10488   ins_encode %{
10489     __ blsmskq($dst$$Register, $src$$Register);
10490   %}
10491 
10492   ins_pipe(ialu_reg);
10493 %}
10494 
10495 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10496 %{
10497   match(Set dst (AndL (AddL src minus_1) src) );
10498   predicate(UseBMI1Instructions);
10499   effect(KILL cr);
10500 
10501   format %{ "blsrq  $dst, $src" %}
10502 
10503   ins_encode %{
10504     __ blsrq($dst$$Register, $src$$Register);
10505   %}
10506 
10507   ins_pipe(ialu_reg);
10508 %}
10509 
10510 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10511 %{
10512   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10513   predicate(UseBMI1Instructions);
10514   effect(KILL cr);
10515 
10516   ins_cost(125);
10517   format %{ "blsrq  $dst, $src" %}
10518 
10519   ins_encode %{
10520     __ blsrq($dst$$Register, $src$$Address);
10521   %}
10522 
10523   ins_pipe(ialu_reg);
10524 %}
10525 
10526 // Or Instructions
10527 // Or Register with Register
10528 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10529 %{
10530   match(Set dst (OrL dst src));
10531   effect(KILL cr);
10532 
10533   format %{ "orq     $dst, $src\t# long" %}
10534   ins_encode %{
10535     __ orq($dst$$Register, $src$$Register);
10536   %}
10537   ins_pipe(ialu_reg_reg);
10538 %}
10539 
10540 // Use any_RegP to match R15 (TLS register) without spilling.
10541 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10542   match(Set dst (OrL dst (CastP2X src)));
10543   effect(KILL cr);
10544 
10545   format %{ "orq     $dst, $src\t# long" %}
10546   ins_encode %{
10547     __ orq($dst$$Register, $src$$Register);
10548   %}
10549   ins_pipe(ialu_reg_reg);
10550 %}
10551 
10552 
10553 // Or Register with Immediate
10554 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10555 %{
10556   match(Set dst (OrL dst src));
10557   effect(KILL cr);
10558 
10559   format %{ "orq     $dst, $src\t# long" %}
10560   ins_encode %{
10561     __ orq($dst$$Register, $src$$constant);
10562   %}
10563   ins_pipe(ialu_reg);
10564 %}
10565 
10566 // Or Register with Memory
10567 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10568 %{
10569   match(Set dst (OrL dst (LoadL src)));
10570   effect(KILL cr);
10571 
10572   ins_cost(150);
10573   format %{ "orq     $dst, $src\t# long" %}
10574   ins_encode %{
10575     __ orq($dst$$Register, $src$$Address);
10576   %}
10577   ins_pipe(ialu_reg_mem);
10578 %}
10579 
10580 // Or Memory with Register
10581 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10582 %{
10583   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10584   effect(KILL cr);
10585 
10586   ins_cost(150);
10587   format %{ "orq     $dst, $src\t# long" %}
10588   ins_encode %{
10589     __ orq($dst$$Address, $src$$Register);
10590   %}
10591   ins_pipe(ialu_mem_reg);
10592 %}
10593 
10594 // Or Memory with Immediate
10595 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10596 %{
10597   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10598   effect(KILL cr);
10599 
10600   ins_cost(125);
10601   format %{ "orq     $dst, $src\t# long" %}
10602   ins_encode %{
10603     __ orq($dst$$Address, $src$$constant);
10604   %}
10605   ins_pipe(ialu_mem_imm);
10606 %}
10607 
10608 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
10609 %{
10610   // con should be a pure 64-bit power of 2 immediate
10611   // because AND/OR works well enough for 8/32-bit values.
10612   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
10613 
10614   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
10615   effect(KILL cr);
10616 
10617   ins_cost(125);
10618   format %{ "btsq    $dst, log2($con)\t# long" %}
10619   ins_encode %{
10620     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
10621   %}
10622   ins_pipe(ialu_mem_imm);
10623 %}
10624 
10625 // Xor Instructions
10626 // Xor Register with Register
10627 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10628 %{
10629   match(Set dst (XorL dst src));
10630   effect(KILL cr);
10631 
10632   format %{ "xorq    $dst, $src\t# long" %}
10633   ins_encode %{
10634     __ xorq($dst$$Register, $src$$Register);
10635   %}
10636   ins_pipe(ialu_reg_reg);
10637 %}
10638 
10639 // Xor Register with Immediate -1
10640 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10641   match(Set dst (XorL dst imm));
10642 
10643   format %{ "notq   $dst" %}
10644   ins_encode %{
10645      __ notq($dst$$Register);
10646   %}
10647   ins_pipe(ialu_reg);
10648 %}
10649 
10650 // Xor Register with Immediate
10651 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10652 %{
10653   match(Set dst (XorL dst src));
10654   effect(KILL cr);
10655 
10656   format %{ "xorq    $dst, $src\t# long" %}
10657   ins_encode %{
10658     __ xorq($dst$$Register, $src$$constant);
10659   %}
10660   ins_pipe(ialu_reg);
10661 %}
10662 
10663 // Xor Register with Memory
10664 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10665 %{
10666   match(Set dst (XorL dst (LoadL src)));
10667   effect(KILL cr);
10668 
10669   ins_cost(150);
10670   format %{ "xorq    $dst, $src\t# long" %}
10671   ins_encode %{
10672     __ xorq($dst$$Register, $src$$Address);
10673   %}
10674   ins_pipe(ialu_reg_mem);
10675 %}
10676 
10677 // Xor Memory with Register
10678 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10679 %{
10680   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10681   effect(KILL cr);
10682 
10683   ins_cost(150);
10684   format %{ "xorq    $dst, $src\t# long" %}
10685   ins_encode %{
10686     __ xorq($dst$$Address, $src$$Register);
10687   %}
10688   ins_pipe(ialu_mem_reg);
10689 %}
10690 
10691 // Xor Memory with Immediate
10692 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10693 %{
10694   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10695   effect(KILL cr);
10696 
10697   ins_cost(125);
10698   format %{ "xorq    $dst, $src\t# long" %}
10699   ins_encode %{
10700     __ xorq($dst$$Address, $src$$constant);
10701   %}
10702   ins_pipe(ialu_mem_imm);
10703 %}
10704 
10705 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10706 %{
10707   match(Set dst (CmpLTMask p q));
10708   effect(KILL cr);
10709 
10710   ins_cost(400);
10711   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10712             "setlt   $dst\n\t"
10713             "movzbl  $dst, $dst\n\t"
10714             "negl    $dst" %}
10715   ins_encode %{
10716     __ cmpl($p$$Register, $q$$Register);
10717     __ setb(Assembler::less, $dst$$Register);
10718     __ movzbl($dst$$Register, $dst$$Register);
10719     __ negl($dst$$Register);
10720   %}
10721   ins_pipe(pipe_slow);
10722 %}
10723 
10724 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
10725 %{
10726   match(Set dst (CmpLTMask dst zero));
10727   effect(KILL cr);
10728 
10729   ins_cost(100);
10730   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10731   ins_encode %{
10732     __ sarl($dst$$Register, 31);
10733   %}
10734   ins_pipe(ialu_reg);
10735 %}
10736 
10737 /* Better to save a register than avoid a branch */
10738 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10739 %{
10740   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10741   effect(KILL cr);
10742   ins_cost(300);
10743   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10744             "jge     done\n\t"
10745             "addl    $p,$y\n"
10746             "done:   " %}
10747   ins_encode %{
10748     Register Rp = $p$$Register;
10749     Register Rq = $q$$Register;
10750     Register Ry = $y$$Register;
10751     Label done;
10752     __ subl(Rp, Rq);
10753     __ jccb(Assembler::greaterEqual, done);
10754     __ addl(Rp, Ry);
10755     __ bind(done);
10756   %}
10757   ins_pipe(pipe_cmplt);
10758 %}
10759 
10760 /* Better to save a register than avoid a branch */
10761 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10762 %{
10763   match(Set y (AndI (CmpLTMask p q) y));
10764   effect(KILL cr);
10765 
10766   ins_cost(300);
10767 
10768   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10769             "jlt     done\n\t"
10770             "xorl    $y, $y\n"
10771             "done:   " %}
10772   ins_encode %{
10773     Register Rp = $p$$Register;
10774     Register Rq = $q$$Register;
10775     Register Ry = $y$$Register;
10776     Label done;
10777     __ cmpl(Rp, Rq);
10778     __ jccb(Assembler::less, done);
10779     __ xorl(Ry, Ry);
10780     __ bind(done);
10781   %}
10782   ins_pipe(pipe_cmplt);
10783 %}
10784 
10785 
10786 //---------- FP Instructions------------------------------------------------
10787 
10788 // Really expensive, avoid
10789 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10790 %{
10791   match(Set cr (CmpF src1 src2));
10792 
10793   ins_cost(500);
10794   format %{ "ucomiss $src1, $src2\n\t"
10795             "jnp,s   exit\n\t"
10796             "pushfq\t# saw NaN, set CF\n\t"
10797             "andq    [rsp], #0xffffff2b\n\t"
10798             "popfq\n"
10799     "exit:" %}
10800   ins_encode %{
10801     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10802     emit_cmpfp_fixup(_masm);
10803   %}
10804   ins_pipe(pipe_slow);
10805 %}
10806 
10807 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10808   match(Set cr (CmpF src1 src2));
10809 
10810   ins_cost(100);
10811   format %{ "ucomiss $src1, $src2" %}
10812   ins_encode %{
10813     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10814   %}
10815   ins_pipe(pipe_slow);
10816 %}
10817 
10818 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10819   match(Set cr (CmpF src1 (LoadF src2)));
10820 
10821   ins_cost(100);
10822   format %{ "ucomiss $src1, $src2" %}
10823   ins_encode %{
10824     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10825   %}
10826   ins_pipe(pipe_slow);
10827 %}
10828 
10829 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10830   match(Set cr (CmpF src con));
10831   ins_cost(100);
10832   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10833   ins_encode %{
10834     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10835   %}
10836   ins_pipe(pipe_slow);
10837 %}
10838 
10839 // Really expensive, avoid
10840 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10841 %{
10842   match(Set cr (CmpD src1 src2));
10843 
10844   ins_cost(500);
10845   format %{ "ucomisd $src1, $src2\n\t"
10846             "jnp,s   exit\n\t"
10847             "pushfq\t# saw NaN, set CF\n\t"
10848             "andq    [rsp], #0xffffff2b\n\t"
10849             "popfq\n"
10850     "exit:" %}
10851   ins_encode %{
10852     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10853     emit_cmpfp_fixup(_masm);
10854   %}
10855   ins_pipe(pipe_slow);
10856 %}
10857 
10858 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10859   match(Set cr (CmpD src1 src2));
10860 
10861   ins_cost(100);
10862   format %{ "ucomisd $src1, $src2 test" %}
10863   ins_encode %{
10864     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10865   %}
10866   ins_pipe(pipe_slow);
10867 %}
10868 
10869 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10870   match(Set cr (CmpD src1 (LoadD src2)));
10871 
10872   ins_cost(100);
10873   format %{ "ucomisd $src1, $src2" %}
10874   ins_encode %{
10875     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10876   %}
10877   ins_pipe(pipe_slow);
10878 %}
10879 
10880 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10881   match(Set cr (CmpD src con));
10882   ins_cost(100);
10883   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10884   ins_encode %{
10885     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10886   %}
10887   ins_pipe(pipe_slow);
10888 %}
10889 
10890 // Compare into -1,0,1
10891 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10892 %{
10893   match(Set dst (CmpF3 src1 src2));
10894   effect(KILL cr);
10895 
10896   ins_cost(275);
10897   format %{ "ucomiss $src1, $src2\n\t"
10898             "movl    $dst, #-1\n\t"
10899             "jp,s    done\n\t"
10900             "jb,s    done\n\t"
10901             "setne   $dst\n\t"
10902             "movzbl  $dst, $dst\n"
10903     "done:" %}
10904   ins_encode %{
10905     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10906     emit_cmpfp3(_masm, $dst$$Register);
10907   %}
10908   ins_pipe(pipe_slow);
10909 %}
10910 
10911 // Compare into -1,0,1
10912 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10913 %{
10914   match(Set dst (CmpF3 src1 (LoadF src2)));
10915   effect(KILL cr);
10916 
10917   ins_cost(275);
10918   format %{ "ucomiss $src1, $src2\n\t"
10919             "movl    $dst, #-1\n\t"
10920             "jp,s    done\n\t"
10921             "jb,s    done\n\t"
10922             "setne   $dst\n\t"
10923             "movzbl  $dst, $dst\n"
10924     "done:" %}
10925   ins_encode %{
10926     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10927     emit_cmpfp3(_masm, $dst$$Register);
10928   %}
10929   ins_pipe(pipe_slow);
10930 %}
10931 
10932 // Compare into -1,0,1
10933 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10934   match(Set dst (CmpF3 src con));
10935   effect(KILL cr);
10936 
10937   ins_cost(275);
10938   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10939             "movl    $dst, #-1\n\t"
10940             "jp,s    done\n\t"
10941             "jb,s    done\n\t"
10942             "setne   $dst\n\t"
10943             "movzbl  $dst, $dst\n"
10944     "done:" %}
10945   ins_encode %{
10946     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10947     emit_cmpfp3(_masm, $dst$$Register);
10948   %}
10949   ins_pipe(pipe_slow);
10950 %}
10951 
10952 // Compare into -1,0,1
10953 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10954 %{
10955   match(Set dst (CmpD3 src1 src2));
10956   effect(KILL cr);
10957 
10958   ins_cost(275);
10959   format %{ "ucomisd $src1, $src2\n\t"
10960             "movl    $dst, #-1\n\t"
10961             "jp,s    done\n\t"
10962             "jb,s    done\n\t"
10963             "setne   $dst\n\t"
10964             "movzbl  $dst, $dst\n"
10965     "done:" %}
10966   ins_encode %{
10967     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10968     emit_cmpfp3(_masm, $dst$$Register);
10969   %}
10970   ins_pipe(pipe_slow);
10971 %}
10972 
10973 // Compare into -1,0,1
10974 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10975 %{
10976   match(Set dst (CmpD3 src1 (LoadD src2)));
10977   effect(KILL cr);
10978 
10979   ins_cost(275);
10980   format %{ "ucomisd $src1, $src2\n\t"
10981             "movl    $dst, #-1\n\t"
10982             "jp,s    done\n\t"
10983             "jb,s    done\n\t"
10984             "setne   $dst\n\t"
10985             "movzbl  $dst, $dst\n"
10986     "done:" %}
10987   ins_encode %{
10988     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10989     emit_cmpfp3(_masm, $dst$$Register);
10990   %}
10991   ins_pipe(pipe_slow);
10992 %}
10993 
10994 // Compare into -1,0,1
10995 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10996   match(Set dst (CmpD3 src con));
10997   effect(KILL cr);
10998 
10999   ins_cost(275);
11000   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
11001             "movl    $dst, #-1\n\t"
11002             "jp,s    done\n\t"
11003             "jb,s    done\n\t"
11004             "setne   $dst\n\t"
11005             "movzbl  $dst, $dst\n"
11006     "done:" %}
11007   ins_encode %{
11008     __ ucomisd($src$$XMMRegister, $constantaddress($con));
11009     emit_cmpfp3(_masm, $dst$$Register);
11010   %}
11011   ins_pipe(pipe_slow);
11012 %}
11013 
11014 //----------Arithmetic Conversion Instructions---------------------------------
11015 
11016 instruct convF2D_reg_reg(regD dst, regF src)
11017 %{
11018   match(Set dst (ConvF2D src));
11019 
11020   format %{ "cvtss2sd $dst, $src" %}
11021   ins_encode %{
11022     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11023   %}
11024   ins_pipe(pipe_slow); // XXX
11025 %}
11026 
11027 instruct convF2D_reg_mem(regD dst, memory src)
11028 %{
11029   match(Set dst (ConvF2D (LoadF src)));
11030 
11031   format %{ "cvtss2sd $dst, $src" %}
11032   ins_encode %{
11033     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
11034   %}
11035   ins_pipe(pipe_slow); // XXX
11036 %}
11037 
11038 instruct convD2F_reg_reg(regF dst, regD src)
11039 %{
11040   match(Set dst (ConvD2F src));
11041 
11042   format %{ "cvtsd2ss $dst, $src" %}
11043   ins_encode %{
11044     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11045   %}
11046   ins_pipe(pipe_slow); // XXX
11047 %}
11048 
11049 instruct convD2F_reg_mem(regF dst, memory src)
11050 %{
11051   match(Set dst (ConvD2F (LoadD src)));
11052 
11053   format %{ "cvtsd2ss $dst, $src" %}
11054   ins_encode %{
11055     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
11056   %}
11057   ins_pipe(pipe_slow); // XXX
11058 %}
11059 
11060 // XXX do mem variants
11061 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11062 %{
11063   match(Set dst (ConvF2I src));
11064   effect(KILL cr);
11065   format %{ "convert_f2i $dst, $src" %}
11066   ins_encode %{
11067     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11068   %}
11069   ins_pipe(pipe_slow);
11070 %}
11071 
11072 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11073 %{
11074   match(Set dst (ConvF2L src));
11075   effect(KILL cr);
11076   format %{ "convert_f2l $dst, $src"%}
11077   ins_encode %{
11078     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
11079   %}
11080   ins_pipe(pipe_slow);
11081 %}
11082 
11083 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11084 %{
11085   match(Set dst (ConvD2I src));
11086   effect(KILL cr);
11087   format %{ "convert_d2i $dst, $src"%}
11088   ins_encode %{
11089     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11090   %}
11091   ins_pipe(pipe_slow);
11092 %}
11093 
11094 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11095 %{
11096   match(Set dst (ConvD2L src));
11097   effect(KILL cr);
11098   format %{ "convert_d2l $dst, $src"%}
11099   ins_encode %{
11100     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
11101   %}
11102   ins_pipe(pipe_slow);
11103 %}
11104 
11105 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11106 %{
11107   match(Set dst (RoundD src));
11108   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11109   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
11110   ins_encode %{
11111     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11112   %}
11113   ins_pipe(pipe_slow);
11114 %}
11115 
11116 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11117 %{
11118   match(Set dst (RoundF src));
11119   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11120   format %{ "round_float $dst,$src" %}
11121   ins_encode %{
11122     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11123   %}
11124   ins_pipe(pipe_slow);
11125 %}
11126 
11127 instruct convI2F_reg_reg(regF dst, rRegI src)
11128 %{
11129   predicate(!UseXmmI2F);
11130   match(Set dst (ConvI2F src));
11131 
11132   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11133   ins_encode %{
11134     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11135   %}
11136   ins_pipe(pipe_slow); // XXX
11137 %}
11138 
11139 instruct convI2F_reg_mem(regF dst, memory src)
11140 %{
11141   match(Set dst (ConvI2F (LoadI src)));
11142 
11143   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11144   ins_encode %{
11145     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
11146   %}
11147   ins_pipe(pipe_slow); // XXX
11148 %}
11149 
11150 instruct convI2D_reg_reg(regD dst, rRegI src)
11151 %{
11152   predicate(!UseXmmI2D);
11153   match(Set dst (ConvI2D src));
11154 
11155   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11156   ins_encode %{
11157     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11158   %}
11159   ins_pipe(pipe_slow); // XXX
11160 %}
11161 
11162 instruct convI2D_reg_mem(regD dst, memory src)
11163 %{
11164   match(Set dst (ConvI2D (LoadI src)));
11165 
11166   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11167   ins_encode %{
11168     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
11169   %}
11170   ins_pipe(pipe_slow); // XXX
11171 %}
11172 
11173 instruct convXI2F_reg(regF dst, rRegI src)
11174 %{
11175   predicate(UseXmmI2F);
11176   match(Set dst (ConvI2F src));
11177 
11178   format %{ "movdl $dst, $src\n\t"
11179             "cvtdq2psl $dst, $dst\t# i2f" %}
11180   ins_encode %{
11181     __ movdl($dst$$XMMRegister, $src$$Register);
11182     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11183   %}
11184   ins_pipe(pipe_slow); // XXX
11185 %}
11186 
11187 instruct convXI2D_reg(regD dst, rRegI src)
11188 %{
11189   predicate(UseXmmI2D);
11190   match(Set dst (ConvI2D src));
11191 
11192   format %{ "movdl $dst, $src\n\t"
11193             "cvtdq2pdl $dst, $dst\t# i2d" %}
11194   ins_encode %{
11195     __ movdl($dst$$XMMRegister, $src$$Register);
11196     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11197   %}
11198   ins_pipe(pipe_slow); // XXX
11199 %}
11200 
11201 instruct convL2F_reg_reg(regF dst, rRegL src)
11202 %{
11203   match(Set dst (ConvL2F src));
11204 
11205   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11206   ins_encode %{
11207     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11208   %}
11209   ins_pipe(pipe_slow); // XXX
11210 %}
11211 
11212 instruct convL2F_reg_mem(regF dst, memory src)
11213 %{
11214   match(Set dst (ConvL2F (LoadL src)));
11215 
11216   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11217   ins_encode %{
11218     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11219   %}
11220   ins_pipe(pipe_slow); // XXX
11221 %}
11222 
11223 instruct convL2D_reg_reg(regD dst, rRegL src)
11224 %{
11225   match(Set dst (ConvL2D src));
11226 
11227   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11228   ins_encode %{
11229     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11230   %}
11231   ins_pipe(pipe_slow); // XXX
11232 %}
11233 
11234 instruct convL2D_reg_mem(regD dst, memory src)
11235 %{
11236   match(Set dst (ConvL2D (LoadL src)));
11237 
11238   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11239   ins_encode %{
11240     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11241   %}
11242   ins_pipe(pipe_slow); // XXX
11243 %}
11244 
11245 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11246 %{
11247   match(Set dst (ConvI2L src));
11248 
11249   ins_cost(125);
11250   format %{ "movslq  $dst, $src\t# i2l" %}
11251   ins_encode %{
11252     __ movslq($dst$$Register, $src$$Register);
11253   %}
11254   ins_pipe(ialu_reg_reg);
11255 %}
11256 
11257 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11258 // %{
11259 //   match(Set dst (ConvI2L src));
11260 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11261 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11262 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11263 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11264 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11265 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11266 
11267 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11268 //   ins_encode(enc_copy(dst, src));
11269 // //   opcode(0x63); // needs REX.W
11270 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11271 //   ins_pipe(ialu_reg_reg);
11272 // %}
11273 
11274 // Zero-extend convert int to long
11275 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11276 %{
11277   match(Set dst (AndL (ConvI2L src) mask));
11278 
11279   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11280   ins_encode %{
11281     if ($dst$$reg != $src$$reg) {
11282       __ movl($dst$$Register, $src$$Register);
11283     }
11284   %}
11285   ins_pipe(ialu_reg_reg);
11286 %}
11287 
11288 // Zero-extend convert int to long
11289 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11290 %{
11291   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11292 
11293   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11294   ins_encode %{
11295     __ movl($dst$$Register, $src$$Address);
11296   %}
11297   ins_pipe(ialu_reg_mem);
11298 %}
11299 
11300 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11301 %{
11302   match(Set dst (AndL src mask));
11303 
11304   format %{ "movl    $dst, $src\t# zero-extend long" %}
11305   ins_encode %{
11306     __ movl($dst$$Register, $src$$Register);
11307   %}
11308   ins_pipe(ialu_reg_reg);
11309 %}
11310 
11311 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11312 %{
11313   match(Set dst (ConvL2I src));
11314 
11315   format %{ "movl    $dst, $src\t# l2i" %}
11316   ins_encode %{
11317     __ movl($dst$$Register, $src$$Register);
11318   %}
11319   ins_pipe(ialu_reg_reg);
11320 %}
11321 
11322 
11323 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11324   match(Set dst (MoveF2I src));
11325   effect(DEF dst, USE src);
11326 
11327   ins_cost(125);
11328   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11329   ins_encode %{
11330     __ movl($dst$$Register, Address(rsp, $src$$disp));
11331   %}
11332   ins_pipe(ialu_reg_mem);
11333 %}
11334 
11335 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11336   match(Set dst (MoveI2F src));
11337   effect(DEF dst, USE src);
11338 
11339   ins_cost(125);
11340   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11341   ins_encode %{
11342     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11343   %}
11344   ins_pipe(pipe_slow);
11345 %}
11346 
11347 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11348   match(Set dst (MoveD2L src));
11349   effect(DEF dst, USE src);
11350 
11351   ins_cost(125);
11352   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11353   ins_encode %{
11354     __ movq($dst$$Register, Address(rsp, $src$$disp));
11355   %}
11356   ins_pipe(ialu_reg_mem);
11357 %}
11358 
11359 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11360   predicate(!UseXmmLoadAndClearUpper);
11361   match(Set dst (MoveL2D src));
11362   effect(DEF dst, USE src);
11363 
11364   ins_cost(125);
11365   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11366   ins_encode %{
11367     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11368   %}
11369   ins_pipe(pipe_slow);
11370 %}
11371 
11372 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11373   predicate(UseXmmLoadAndClearUpper);
11374   match(Set dst (MoveL2D src));
11375   effect(DEF dst, USE src);
11376 
11377   ins_cost(125);
11378   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11379   ins_encode %{
11380     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11381   %}
11382   ins_pipe(pipe_slow);
11383 %}
11384 
11385 
11386 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11387   match(Set dst (MoveF2I src));
11388   effect(DEF dst, USE src);
11389 
11390   ins_cost(95); // XXX
11391   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11392   ins_encode %{
11393     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11394   %}
11395   ins_pipe(pipe_slow);
11396 %}
11397 
11398 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11399   match(Set dst (MoveI2F src));
11400   effect(DEF dst, USE src);
11401 
11402   ins_cost(100);
11403   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11404   ins_encode %{
11405     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11406   %}
11407   ins_pipe( ialu_mem_reg );
11408 %}
11409 
11410 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11411   match(Set dst (MoveD2L src));
11412   effect(DEF dst, USE src);
11413 
11414   ins_cost(95); // XXX
11415   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11416   ins_encode %{
11417     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11418   %}
11419   ins_pipe(pipe_slow);
11420 %}
11421 
11422 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11423   match(Set dst (MoveL2D src));
11424   effect(DEF dst, USE src);
11425 
11426   ins_cost(100);
11427   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11428   ins_encode %{
11429     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11430   %}
11431   ins_pipe(ialu_mem_reg);
11432 %}
11433 
11434 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11435   match(Set dst (MoveF2I src));
11436   effect(DEF dst, USE src);
11437   ins_cost(85);
11438   format %{ "movd    $dst,$src\t# MoveF2I" %}
11439   ins_encode %{
11440     __ movdl($dst$$Register, $src$$XMMRegister);
11441   %}
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11446   match(Set dst (MoveD2L src));
11447   effect(DEF dst, USE src);
11448   ins_cost(85);
11449   format %{ "movd    $dst,$src\t# MoveD2L" %}
11450   ins_encode %{
11451     __ movdq($dst$$Register, $src$$XMMRegister);
11452   %}
11453   ins_pipe( pipe_slow );
11454 %}
11455 
11456 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11457   match(Set dst (MoveI2F src));
11458   effect(DEF dst, USE src);
11459   ins_cost(100);
11460   format %{ "movd    $dst,$src\t# MoveI2F" %}
11461   ins_encode %{
11462     __ movdl($dst$$XMMRegister, $src$$Register);
11463   %}
11464   ins_pipe( pipe_slow );
11465 %}
11466 
11467 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11468   match(Set dst (MoveL2D src));
11469   effect(DEF dst, USE src);
11470   ins_cost(100);
11471   format %{ "movd    $dst,$src\t# MoveL2D" %}
11472   ins_encode %{
11473      __ movdq($dst$$XMMRegister, $src$$Register);
11474   %}
11475   ins_pipe( pipe_slow );
11476 %}
11477 
11478 // Fast clearing of an array
11479 // Small ClearArray non-AVX512.
11480 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11481                   Universe dummy, rFlagsReg cr)
11482 %{
11483   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11484   match(Set dummy (ClearArray cnt base));
11485   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11486 
11487   format %{ $$template
11488     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11489     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11490     $$emit$$"jg      LARGE\n\t"
11491     $$emit$$"dec     rcx\n\t"
11492     $$emit$$"js      DONE\t# Zero length\n\t"
11493     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11494     $$emit$$"dec     rcx\n\t"
11495     $$emit$$"jge     LOOP\n\t"
11496     $$emit$$"jmp     DONE\n\t"
11497     $$emit$$"# LARGE:\n\t"
11498     if (UseFastStosb) {
11499        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11500        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11501     } else if (UseXMMForObjInit) {
11502        $$emit$$"mov     rdi,rax\n\t"
11503        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11504        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11505        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11506        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11507        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11508        $$emit$$"add     0x40,rax\n\t"
11509        $$emit$$"# L_zero_64_bytes:\n\t"
11510        $$emit$$"sub     0x8,rcx\n\t"
11511        $$emit$$"jge     L_loop\n\t"
11512        $$emit$$"add     0x4,rcx\n\t"
11513        $$emit$$"jl      L_tail\n\t"
11514        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11515        $$emit$$"add     0x20,rax\n\t"
11516        $$emit$$"sub     0x4,rcx\n\t"
11517        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11518        $$emit$$"add     0x4,rcx\n\t"
11519        $$emit$$"jle     L_end\n\t"
11520        $$emit$$"dec     rcx\n\t"
11521        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11522        $$emit$$"vmovq   xmm0,(rax)\n\t"
11523        $$emit$$"add     0x8,rax\n\t"
11524        $$emit$$"dec     rcx\n\t"
11525        $$emit$$"jge     L_sloop\n\t"
11526        $$emit$$"# L_end:\n\t"
11527     } else {
11528        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11529     }
11530     $$emit$$"# DONE"
11531   %}
11532   ins_encode %{
11533     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11534                  $tmp$$XMMRegister, false, knoreg);
11535   %}
11536   ins_pipe(pipe_slow);
11537 %}
11538 
11539 // Small ClearArray AVX512 non-constant length.
11540 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11541                        Universe dummy, rFlagsReg cr)
11542 %{
11543   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11544   match(Set dummy (ClearArray cnt base));
11545   ins_cost(125);
11546   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11547 
11548   format %{ $$template
11549     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11550     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11551     $$emit$$"jg      LARGE\n\t"
11552     $$emit$$"dec     rcx\n\t"
11553     $$emit$$"js      DONE\t# Zero length\n\t"
11554     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11555     $$emit$$"dec     rcx\n\t"
11556     $$emit$$"jge     LOOP\n\t"
11557     $$emit$$"jmp     DONE\n\t"
11558     $$emit$$"# LARGE:\n\t"
11559     if (UseFastStosb) {
11560        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11561        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11562     } else if (UseXMMForObjInit) {
11563        $$emit$$"mov     rdi,rax\n\t"
11564        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11565        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11566        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11567        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11568        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11569        $$emit$$"add     0x40,rax\n\t"
11570        $$emit$$"# L_zero_64_bytes:\n\t"
11571        $$emit$$"sub     0x8,rcx\n\t"
11572        $$emit$$"jge     L_loop\n\t"
11573        $$emit$$"add     0x4,rcx\n\t"
11574        $$emit$$"jl      L_tail\n\t"
11575        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11576        $$emit$$"add     0x20,rax\n\t"
11577        $$emit$$"sub     0x4,rcx\n\t"
11578        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11579        $$emit$$"add     0x4,rcx\n\t"
11580        $$emit$$"jle     L_end\n\t"
11581        $$emit$$"dec     rcx\n\t"
11582        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11583        $$emit$$"vmovq   xmm0,(rax)\n\t"
11584        $$emit$$"add     0x8,rax\n\t"
11585        $$emit$$"dec     rcx\n\t"
11586        $$emit$$"jge     L_sloop\n\t"
11587        $$emit$$"# L_end:\n\t"
11588     } else {
11589        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11590     }
11591     $$emit$$"# DONE"
11592   %}
11593   ins_encode %{
11594     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11595                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11596   %}
11597   ins_pipe(pipe_slow);
11598 %}
11599 
11600 // Large ClearArray non-AVX512.
11601 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
11602                         Universe dummy, rFlagsReg cr)
11603 %{
11604   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
11605   match(Set dummy (ClearArray cnt base));
11606   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11607 
11608   format %{ $$template
11609     if (UseFastStosb) {
11610        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11611        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11612        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11613     } else if (UseXMMForObjInit) {
11614        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11615        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11616        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11617        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11618        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11619        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11620        $$emit$$"add     0x40,rax\n\t"
11621        $$emit$$"# L_zero_64_bytes:\n\t"
11622        $$emit$$"sub     0x8,rcx\n\t"
11623        $$emit$$"jge     L_loop\n\t"
11624        $$emit$$"add     0x4,rcx\n\t"
11625        $$emit$$"jl      L_tail\n\t"
11626        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11627        $$emit$$"add     0x20,rax\n\t"
11628        $$emit$$"sub     0x4,rcx\n\t"
11629        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11630        $$emit$$"add     0x4,rcx\n\t"
11631        $$emit$$"jle     L_end\n\t"
11632        $$emit$$"dec     rcx\n\t"
11633        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11634        $$emit$$"vmovq   xmm0,(rax)\n\t"
11635        $$emit$$"add     0x8,rax\n\t"
11636        $$emit$$"dec     rcx\n\t"
11637        $$emit$$"jge     L_sloop\n\t"
11638        $$emit$$"# L_end:\n\t"
11639     } else {
11640        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11641        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11642     }
11643   %}
11644   ins_encode %{
11645     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11646                  $tmp$$XMMRegister, true, knoreg);
11647   %}
11648   ins_pipe(pipe_slow);
11649 %}
11650 
11651 // Large ClearArray AVX512.
11652 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
11653                              Universe dummy, rFlagsReg cr)
11654 %{
11655   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11656   match(Set dummy (ClearArray cnt base));
11657   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11658 
11659   format %{ $$template
11660     if (UseFastStosb) {
11661        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11662        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11663        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11664     } else if (UseXMMForObjInit) {
11665        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11666        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11667        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11668        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11669        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11670        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11671        $$emit$$"add     0x40,rax\n\t"
11672        $$emit$$"# L_zero_64_bytes:\n\t"
11673        $$emit$$"sub     0x8,rcx\n\t"
11674        $$emit$$"jge     L_loop\n\t"
11675        $$emit$$"add     0x4,rcx\n\t"
11676        $$emit$$"jl      L_tail\n\t"
11677        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11678        $$emit$$"add     0x20,rax\n\t"
11679        $$emit$$"sub     0x4,rcx\n\t"
11680        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11681        $$emit$$"add     0x4,rcx\n\t"
11682        $$emit$$"jle     L_end\n\t"
11683        $$emit$$"dec     rcx\n\t"
11684        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11685        $$emit$$"vmovq   xmm0,(rax)\n\t"
11686        $$emit$$"add     0x8,rax\n\t"
11687        $$emit$$"dec     rcx\n\t"
11688        $$emit$$"jge     L_sloop\n\t"
11689        $$emit$$"# L_end:\n\t"
11690     } else {
11691        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11692        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11693     }
11694   %}
11695   ins_encode %{
11696     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11697                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11698   %}
11699   ins_pipe(pipe_slow);
11700 %}
11701 
11702 // Small ClearArray AVX512 constant length.
11703 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
11704 %{
11705   predicate(!((ClearArrayNode*)n)->is_large() &&
11706               ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11707   match(Set dummy (ClearArray cnt base));
11708   ins_cost(100);
11709   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11710   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11711   ins_encode %{
11712    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11713   %}
11714   ins_pipe(pipe_slow);
11715 %}
11716 
11717 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11718                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11719 %{
11720   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11721   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11722   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11723 
11724   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11725   ins_encode %{
11726     __ string_compare($str1$$Register, $str2$$Register,
11727                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11728                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11734                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11735 %{
11736   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11737   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11738   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11739 
11740   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11741   ins_encode %{
11742     __ string_compare($str1$$Register, $str2$$Register,
11743                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11744                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11745   %}
11746   ins_pipe( pipe_slow );
11747 %}
11748 
11749 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11750                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11751 %{
11752   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11753   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11754   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11755 
11756   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11757   ins_encode %{
11758     __ string_compare($str1$$Register, $str2$$Register,
11759                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11760                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11761   %}
11762   ins_pipe( pipe_slow );
11763 %}
11764 
11765 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11766                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11767 %{
11768   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11769   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11770   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11771 
11772   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11773   ins_encode %{
11774     __ string_compare($str1$$Register, $str2$$Register,
11775                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11776                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11777   %}
11778   ins_pipe( pipe_slow );
11779 %}
11780 
11781 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11782                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11783 %{
11784   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11785   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11786   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11787 
11788   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11789   ins_encode %{
11790     __ string_compare($str1$$Register, $str2$$Register,
11791                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11792                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11793   %}
11794   ins_pipe( pipe_slow );
11795 %}
11796 
11797 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11798                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11799 %{
11800   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11801   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11802   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11803 
11804   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11805   ins_encode %{
11806     __ string_compare($str1$$Register, $str2$$Register,
11807                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11808                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11809   %}
11810   ins_pipe( pipe_slow );
11811 %}
11812 
11813 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11814                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11815 %{
11816   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11817   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11818   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11819 
11820   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11821   ins_encode %{
11822     __ string_compare($str2$$Register, $str1$$Register,
11823                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11824                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11825   %}
11826   ins_pipe( pipe_slow );
11827 %}
11828 
11829 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11830                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11831 %{
11832   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11833   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11834   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11835 
11836   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11837   ins_encode %{
11838     __ string_compare($str2$$Register, $str1$$Register,
11839                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11840                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11841   %}
11842   ins_pipe( pipe_slow );
11843 %}
11844 
11845 // fast search of substring with known size.
11846 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11847                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11848 %{
11849   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11850   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11851   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11852 
11853   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11854   ins_encode %{
11855     int icnt2 = (int)$int_cnt2$$constant;
11856     if (icnt2 >= 16) {
11857       // IndexOf for constant substrings with size >= 16 elements
11858       // which don't need to be loaded through stack.
11859       __ string_indexofC8($str1$$Register, $str2$$Register,
11860                           $cnt1$$Register, $cnt2$$Register,
11861                           icnt2, $result$$Register,
11862                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11863     } else {
11864       // Small strings are loaded through stack if they cross page boundary.
11865       __ string_indexof($str1$$Register, $str2$$Register,
11866                         $cnt1$$Register, $cnt2$$Register,
11867                         icnt2, $result$$Register,
11868                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11869     }
11870   %}
11871   ins_pipe( pipe_slow );
11872 %}
11873 
11874 // fast search of substring with known size.
11875 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11876                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11877 %{
11878   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11879   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11880   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11881 
11882   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11883   ins_encode %{
11884     int icnt2 = (int)$int_cnt2$$constant;
11885     if (icnt2 >= 8) {
11886       // IndexOf for constant substrings with size >= 8 elements
11887       // which don't need to be loaded through stack.
11888       __ string_indexofC8($str1$$Register, $str2$$Register,
11889                           $cnt1$$Register, $cnt2$$Register,
11890                           icnt2, $result$$Register,
11891                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11892     } else {
11893       // Small strings are loaded through stack if they cross page boundary.
11894       __ string_indexof($str1$$Register, $str2$$Register,
11895                         $cnt1$$Register, $cnt2$$Register,
11896                         icnt2, $result$$Register,
11897                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11898     }
11899   %}
11900   ins_pipe( pipe_slow );
11901 %}
11902 
11903 // fast search of substring with known size.
11904 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11905                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11906 %{
11907   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11908   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11909   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11910 
11911   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11912   ins_encode %{
11913     int icnt2 = (int)$int_cnt2$$constant;
11914     if (icnt2 >= 8) {
11915       // IndexOf for constant substrings with size >= 8 elements
11916       // which don't need to be loaded through stack.
11917       __ string_indexofC8($str1$$Register, $str2$$Register,
11918                           $cnt1$$Register, $cnt2$$Register,
11919                           icnt2, $result$$Register,
11920                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11921     } else {
11922       // Small strings are loaded through stack if they cross page boundary.
11923       __ string_indexof($str1$$Register, $str2$$Register,
11924                         $cnt1$$Register, $cnt2$$Register,
11925                         icnt2, $result$$Register,
11926                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11927     }
11928   %}
11929   ins_pipe( pipe_slow );
11930 %}
11931 
11932 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11933                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11934 %{
11935   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11936   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11937   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11938 
11939   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11940   ins_encode %{
11941     __ string_indexof($str1$$Register, $str2$$Register,
11942                       $cnt1$$Register, $cnt2$$Register,
11943                       (-1), $result$$Register,
11944                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11945   %}
11946   ins_pipe( pipe_slow );
11947 %}
11948 
11949 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11950                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11951 %{
11952   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11953   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11954   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11955 
11956   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11957   ins_encode %{
11958     __ string_indexof($str1$$Register, $str2$$Register,
11959                       $cnt1$$Register, $cnt2$$Register,
11960                       (-1), $result$$Register,
11961                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11962   %}
11963   ins_pipe( pipe_slow );
11964 %}
11965 
11966 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11967                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11968 %{
11969   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11970   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11971   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11972 
11973   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11974   ins_encode %{
11975     __ string_indexof($str1$$Register, $str2$$Register,
11976                       $cnt1$$Register, $cnt2$$Register,
11977                       (-1), $result$$Register,
11978                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11979   %}
11980   ins_pipe( pipe_slow );
11981 %}
11982 
11983 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11984                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11985 %{
11986   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11987   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11988   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11989   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11990   ins_encode %{
11991     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11992                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11993   %}
11994   ins_pipe( pipe_slow );
11995 %}
11996 
11997 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11998                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11999 %{
12000   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12001   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12002   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12003   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12004   ins_encode %{
12005     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12006                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12007   %}
12008   ins_pipe( pipe_slow );
12009 %}
12010 
12011 // fast string equals
12012 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12013                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
12014 %{
12015   predicate(!VM_Version::supports_avx512vlbw());
12016   match(Set result (StrEquals (Binary str1 str2) cnt));
12017   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12018 
12019   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12020   ins_encode %{
12021     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12022                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12023                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12024   %}
12025   ins_pipe( pipe_slow );
12026 %}
12027 
12028 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12029                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
12030 %{
12031   predicate(VM_Version::supports_avx512vlbw());
12032   match(Set result (StrEquals (Binary str1 str2) cnt));
12033   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12034 
12035   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12036   ins_encode %{
12037     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12038                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12039                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12040   %}
12041   ins_pipe( pipe_slow );
12042 %}
12043 
12044 // fast array equals
12045 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12046                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12047 %{
12048   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12049   match(Set result (AryEq ary1 ary2));
12050   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12051 
12052   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12053   ins_encode %{
12054     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12055                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12056                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12057   %}
12058   ins_pipe( pipe_slow );
12059 %}
12060 
12061 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12062                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12063 %{
12064   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12065   match(Set result (AryEq ary1 ary2));
12066   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12067 
12068   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12069   ins_encode %{
12070     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12071                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12072                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12073   %}
12074   ins_pipe( pipe_slow );
12075 %}
12076 
12077 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12078                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12079 %{
12080   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12081   match(Set result (AryEq ary1 ary2));
12082   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12083 
12084   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12085   ins_encode %{
12086     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12087                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12088                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12089   %}
12090   ins_pipe( pipe_slow );
12091 %}
12092 
12093 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12094                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12095 %{
12096   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12097   match(Set result (AryEq ary1 ary2));
12098   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12099 
12100   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12101   ins_encode %{
12102     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12103                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12104                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12105   %}
12106   ins_pipe( pipe_slow );
12107 %}
12108 
12109 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
12110                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
12111                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
12112                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
12113                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
12114 %{
12115   predicate(UseAVX >= 2);
12116   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
12117   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
12118          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
12119          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
12120          USE basic_type, KILL cr);
12121 
12122   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
12123   ins_encode %{
12124     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
12125                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12126                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
12127                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
12128                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
12129                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
12130                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
12131   %}
12132   ins_pipe( pipe_slow );
12133 %}
12134 
12135 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12136                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
12137 %{
12138   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12139   match(Set result (CountPositives ary1 len));
12140   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12141 
12142   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12143   ins_encode %{
12144     __ count_positives($ary1$$Register, $len$$Register,
12145                        $result$$Register, $tmp3$$Register,
12146                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12147   %}
12148   ins_pipe( pipe_slow );
12149 %}
12150 
12151 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12152                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
12153 %{
12154   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12155   match(Set result (CountPositives ary1 len));
12156   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12157 
12158   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12159   ins_encode %{
12160     __ count_positives($ary1$$Register, $len$$Register,
12161                        $result$$Register, $tmp3$$Register,
12162                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12163   %}
12164   ins_pipe( pipe_slow );
12165 %}
12166 
12167 // fast char[] to byte[] compression
12168 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12169                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12170   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12171   match(Set result (StrCompressedCopy src (Binary dst len)));
12172   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
12173          USE_KILL len, KILL tmp5, KILL cr);
12174 
12175   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12176   ins_encode %{
12177     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12178                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12179                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12180                            knoreg, knoreg);
12181   %}
12182   ins_pipe( pipe_slow );
12183 %}
12184 
12185 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12186                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12187   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12188   match(Set result (StrCompressedCopy src (Binary dst len)));
12189   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
12190          USE_KILL len, KILL tmp5, KILL cr);
12191 
12192   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12193   ins_encode %{
12194     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12195                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12196                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12197                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12198   %}
12199   ins_pipe( pipe_slow );
12200 %}
12201 // fast byte[] to char[] inflation
12202 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12203                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
12204   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12205   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12206   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12207 
12208   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12209   ins_encode %{
12210     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12211                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12212   %}
12213   ins_pipe( pipe_slow );
12214 %}
12215 
12216 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12217                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
12218   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12219   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12220   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12221 
12222   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12223   ins_encode %{
12224     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12225                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12226   %}
12227   ins_pipe( pipe_slow );
12228 %}
12229 
12230 // encode char[] to byte[] in ISO_8859_1
12231 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12232                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12233                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12234   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12235   match(Set result (EncodeISOArray src (Binary dst len)));
12236   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12237 
12238   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12239   ins_encode %{
12240     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12241                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12242                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12243   %}
12244   ins_pipe( pipe_slow );
12245 %}
12246 
12247 // encode char[] to byte[] in ASCII
12248 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12249                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12250                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12251   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12252   match(Set result (EncodeISOArray src (Binary dst len)));
12253   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12254 
12255   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12256   ins_encode %{
12257     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12258                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12259                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12260   %}
12261   ins_pipe( pipe_slow );
12262 %}
12263 
12264 //----------Overflow Math Instructions-----------------------------------------
12265 
12266 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12267 %{
12268   match(Set cr (OverflowAddI op1 op2));
12269   effect(DEF cr, USE_KILL op1, USE op2);
12270 
12271   format %{ "addl    $op1, $op2\t# overflow check int" %}
12272 
12273   ins_encode %{
12274     __ addl($op1$$Register, $op2$$Register);
12275   %}
12276   ins_pipe(ialu_reg_reg);
12277 %}
12278 
12279 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
12280 %{
12281   match(Set cr (OverflowAddI op1 op2));
12282   effect(DEF cr, USE_KILL op1, USE op2);
12283 
12284   format %{ "addl    $op1, $op2\t# overflow check int" %}
12285 
12286   ins_encode %{
12287     __ addl($op1$$Register, $op2$$constant);
12288   %}
12289   ins_pipe(ialu_reg_reg);
12290 %}
12291 
12292 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12293 %{
12294   match(Set cr (OverflowAddL op1 op2));
12295   effect(DEF cr, USE_KILL op1, USE op2);
12296 
12297   format %{ "addq    $op1, $op2\t# overflow check long" %}
12298   ins_encode %{
12299     __ addq($op1$$Register, $op2$$Register);
12300   %}
12301   ins_pipe(ialu_reg_reg);
12302 %}
12303 
12304 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
12305 %{
12306   match(Set cr (OverflowAddL op1 op2));
12307   effect(DEF cr, USE_KILL op1, USE op2);
12308 
12309   format %{ "addq    $op1, $op2\t# overflow check long" %}
12310   ins_encode %{
12311     __ addq($op1$$Register, $op2$$constant);
12312   %}
12313   ins_pipe(ialu_reg_reg);
12314 %}
12315 
12316 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12317 %{
12318   match(Set cr (OverflowSubI op1 op2));
12319 
12320   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12321   ins_encode %{
12322     __ cmpl($op1$$Register, $op2$$Register);
12323   %}
12324   ins_pipe(ialu_reg_reg);
12325 %}
12326 
12327 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12328 %{
12329   match(Set cr (OverflowSubI op1 op2));
12330 
12331   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12332   ins_encode %{
12333     __ cmpl($op1$$Register, $op2$$constant);
12334   %}
12335   ins_pipe(ialu_reg_reg);
12336 %}
12337 
12338 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12339 %{
12340   match(Set cr (OverflowSubL op1 op2));
12341 
12342   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12343   ins_encode %{
12344     __ cmpq($op1$$Register, $op2$$Register);
12345   %}
12346   ins_pipe(ialu_reg_reg);
12347 %}
12348 
12349 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12350 %{
12351   match(Set cr (OverflowSubL op1 op2));
12352 
12353   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12354   ins_encode %{
12355     __ cmpq($op1$$Register, $op2$$constant);
12356   %}
12357   ins_pipe(ialu_reg_reg);
12358 %}
12359 
12360 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
12361 %{
12362   match(Set cr (OverflowSubI zero op2));
12363   effect(DEF cr, USE_KILL op2);
12364 
12365   format %{ "negl    $op2\t# overflow check int" %}
12366   ins_encode %{
12367     __ negl($op2$$Register);
12368   %}
12369   ins_pipe(ialu_reg_reg);
12370 %}
12371 
12372 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
12373 %{
12374   match(Set cr (OverflowSubL zero op2));
12375   effect(DEF cr, USE_KILL op2);
12376 
12377   format %{ "negq    $op2\t# overflow check long" %}
12378   ins_encode %{
12379     __ negq($op2$$Register);
12380   %}
12381   ins_pipe(ialu_reg_reg);
12382 %}
12383 
12384 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12385 %{
12386   match(Set cr (OverflowMulI op1 op2));
12387   effect(DEF cr, USE_KILL op1, USE op2);
12388 
12389   format %{ "imull    $op1, $op2\t# overflow check int" %}
12390   ins_encode %{
12391     __ imull($op1$$Register, $op2$$Register);
12392   %}
12393   ins_pipe(ialu_reg_reg_alu0);
12394 %}
12395 
12396 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
12397 %{
12398   match(Set cr (OverflowMulI op1 op2));
12399   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12400 
12401   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
12402   ins_encode %{
12403     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
12404   %}
12405   ins_pipe(ialu_reg_reg_alu0);
12406 %}
12407 
12408 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12409 %{
12410   match(Set cr (OverflowMulL op1 op2));
12411   effect(DEF cr, USE_KILL op1, USE op2);
12412 
12413   format %{ "imulq    $op1, $op2\t# overflow check long" %}
12414   ins_encode %{
12415     __ imulq($op1$$Register, $op2$$Register);
12416   %}
12417   ins_pipe(ialu_reg_reg_alu0);
12418 %}
12419 
12420 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
12421 %{
12422   match(Set cr (OverflowMulL op1 op2));
12423   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12424 
12425   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12426   ins_encode %{
12427     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12428   %}
12429   ins_pipe(ialu_reg_reg_alu0);
12430 %}
12431 
12432 
12433 //----------Control Flow Instructions------------------------------------------
12434 // Signed compare Instructions
12435 
12436 // XXX more variants!!
12437 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12438 %{
12439   match(Set cr (CmpI op1 op2));
12440   effect(DEF cr, USE op1, USE op2);
12441 
12442   format %{ "cmpl    $op1, $op2" %}
12443   ins_encode %{
12444     __ cmpl($op1$$Register, $op2$$Register);
12445   %}
12446   ins_pipe(ialu_cr_reg_reg);
12447 %}
12448 
12449 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12450 %{
12451   match(Set cr (CmpI op1 op2));
12452 
12453   format %{ "cmpl    $op1, $op2" %}
12454   ins_encode %{
12455     __ cmpl($op1$$Register, $op2$$constant);
12456   %}
12457   ins_pipe(ialu_cr_reg_imm);
12458 %}
12459 
12460 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12461 %{
12462   match(Set cr (CmpI op1 (LoadI op2)));
12463 
12464   ins_cost(500); // XXX
12465   format %{ "cmpl    $op1, $op2" %}
12466   ins_encode %{
12467     __ cmpl($op1$$Register, $op2$$Address);
12468   %}
12469   ins_pipe(ialu_cr_reg_mem);
12470 %}
12471 
12472 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
12473 %{
12474   match(Set cr (CmpI src zero));
12475 
12476   format %{ "testl   $src, $src" %}
12477   ins_encode %{
12478     __ testl($src$$Register, $src$$Register);
12479   %}
12480   ins_pipe(ialu_cr_reg_imm);
12481 %}
12482 
12483 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
12484 %{
12485   match(Set cr (CmpI (AndI src con) zero));
12486 
12487   format %{ "testl   $src, $con" %}
12488   ins_encode %{
12489     __ testl($src$$Register, $con$$constant);
12490   %}
12491   ins_pipe(ialu_cr_reg_imm);
12492 %}
12493 
12494 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
12495 %{
12496   match(Set cr (CmpI (AndI src1 src2) zero));
12497 
12498   format %{ "testl   $src1, $src2" %}
12499   ins_encode %{
12500     __ testl($src1$$Register, $src2$$Register);
12501   %}
12502   ins_pipe(ialu_cr_reg_imm);
12503 %}
12504 
12505 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
12506 %{
12507   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12508 
12509   format %{ "testl   $src, $mem" %}
12510   ins_encode %{
12511     __ testl($src$$Register, $mem$$Address);
12512   %}
12513   ins_pipe(ialu_cr_reg_mem);
12514 %}
12515 
12516 // Unsigned compare Instructions; really, same as signed except they
12517 // produce an rFlagsRegU instead of rFlagsReg.
12518 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12519 %{
12520   match(Set cr (CmpU op1 op2));
12521 
12522   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12523   ins_encode %{
12524     __ cmpl($op1$$Register, $op2$$Register);
12525   %}
12526   ins_pipe(ialu_cr_reg_reg);
12527 %}
12528 
12529 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12530 %{
12531   match(Set cr (CmpU op1 op2));
12532 
12533   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12534   ins_encode %{
12535     __ cmpl($op1$$Register, $op2$$constant);
12536   %}
12537   ins_pipe(ialu_cr_reg_imm);
12538 %}
12539 
12540 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12541 %{
12542   match(Set cr (CmpU op1 (LoadI op2)));
12543 
12544   ins_cost(500); // XXX
12545   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12546   ins_encode %{
12547     __ cmpl($op1$$Register, $op2$$Address);
12548   %}
12549   ins_pipe(ialu_cr_reg_mem);
12550 %}
12551 
12552 // // // Cisc-spilled version of cmpU_rReg
12553 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12554 // //%{
12555 // //  match(Set cr (CmpU (LoadI op1) op2));
12556 // //
12557 // //  format %{ "CMPu   $op1,$op2" %}
12558 // //  ins_cost(500);
12559 // //  opcode(0x39);  /* Opcode 39 /r */
12560 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12561 // //%}
12562 
12563 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
12564 %{
12565   match(Set cr (CmpU src zero));
12566 
12567   format %{ "testl   $src, $src\t# unsigned" %}
12568   ins_encode %{
12569     __ testl($src$$Register, $src$$Register);
12570   %}
12571   ins_pipe(ialu_cr_reg_imm);
12572 %}
12573 
12574 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12575 %{
12576   match(Set cr (CmpP op1 op2));
12577 
12578   format %{ "cmpq    $op1, $op2\t# ptr" %}
12579   ins_encode %{
12580     __ cmpq($op1$$Register, $op2$$Register);
12581   %}
12582   ins_pipe(ialu_cr_reg_reg);
12583 %}
12584 
12585 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12586 %{
12587   match(Set cr (CmpP op1 (LoadP op2)));
12588   predicate(n->in(2)->as_Load()->barrier_data() == 0);
12589 
12590   ins_cost(500); // XXX
12591   format %{ "cmpq    $op1, $op2\t# ptr" %}
12592   ins_encode %{
12593     __ cmpq($op1$$Register, $op2$$Address);
12594   %}
12595   ins_pipe(ialu_cr_reg_mem);
12596 %}
12597 
12598 // // // Cisc-spilled version of cmpP_rReg
12599 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12600 // //%{
12601 // //  match(Set cr (CmpP (LoadP op1) op2));
12602 // //
12603 // //  format %{ "CMPu   $op1,$op2" %}
12604 // //  ins_cost(500);
12605 // //  opcode(0x39);  /* Opcode 39 /r */
12606 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12607 // //%}
12608 
12609 // XXX this is generalized by compP_rReg_mem???
12610 // Compare raw pointer (used in out-of-heap check).
12611 // Only works because non-oop pointers must be raw pointers
12612 // and raw pointers have no anti-dependencies.
12613 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12614 %{
12615   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
12616             n->in(2)->as_Load()->barrier_data() == 0);
12617   match(Set cr (CmpP op1 (LoadP op2)));
12618 
12619   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12620   ins_encode %{
12621     __ cmpq($op1$$Register, $op2$$Address);
12622   %}
12623   ins_pipe(ialu_cr_reg_mem);
12624 %}
12625 
12626 // This will generate a signed flags result. This should be OK since
12627 // any compare to a zero should be eq/neq.
12628 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12629 %{
12630   match(Set cr (CmpP src zero));
12631 
12632   format %{ "testq   $src, $src\t# ptr" %}
12633   ins_encode %{
12634     __ testq($src$$Register, $src$$Register);
12635   %}
12636   ins_pipe(ialu_cr_reg_imm);
12637 %}
12638 
12639 // This will generate a signed flags result. This should be OK since
12640 // any compare to a zero should be eq/neq.
12641 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12642 %{
12643   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12644             n->in(1)->as_Load()->barrier_data() == 0);
12645   match(Set cr (CmpP (LoadP op) zero));
12646 
12647   ins_cost(500); // XXX
12648   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12649   ins_encode %{
12650     __ testq($op$$Address, 0xFFFFFFFF);
12651   %}
12652   ins_pipe(ialu_cr_reg_imm);
12653 %}
12654 
12655 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12656 %{
12657   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12658             n->in(1)->as_Load()->barrier_data() == 0);
12659   match(Set cr (CmpP (LoadP mem) zero));
12660 
12661   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12662   ins_encode %{
12663     __ cmpq(r12, $mem$$Address);
12664   %}
12665   ins_pipe(ialu_cr_reg_mem);
12666 %}
12667 
12668 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12669 %{
12670   match(Set cr (CmpN op1 op2));
12671 
12672   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12673   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12674   ins_pipe(ialu_cr_reg_reg);
12675 %}
12676 
12677 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12678 %{
12679   match(Set cr (CmpN src (LoadN mem)));
12680 
12681   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12682   ins_encode %{
12683     __ cmpl($src$$Register, $mem$$Address);
12684   %}
12685   ins_pipe(ialu_cr_reg_mem);
12686 %}
12687 
12688 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12689   match(Set cr (CmpN op1 op2));
12690 
12691   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12692   ins_encode %{
12693     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12694   %}
12695   ins_pipe(ialu_cr_reg_imm);
12696 %}
12697 
12698 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12699 %{
12700   match(Set cr (CmpN src (LoadN mem)));
12701 
12702   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12703   ins_encode %{
12704     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12705   %}
12706   ins_pipe(ialu_cr_reg_mem);
12707 %}
12708 
12709 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12710   match(Set cr (CmpN op1 op2));
12711 
12712   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12713   ins_encode %{
12714     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12715   %}
12716   ins_pipe(ialu_cr_reg_imm);
12717 %}
12718 
12719 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12720 %{
12721   predicate(!UseCompactObjectHeaders);
12722   match(Set cr (CmpN src (LoadNKlass mem)));
12723 
12724   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12725   ins_encode %{
12726     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12727   %}
12728   ins_pipe(ialu_cr_reg_mem);
12729 %}
12730 
12731 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12732   match(Set cr (CmpN src zero));
12733 
12734   format %{ "testl   $src, $src\t# compressed ptr" %}
12735   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12736   ins_pipe(ialu_cr_reg_imm);
12737 %}
12738 
12739 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12740 %{
12741   predicate(CompressedOops::base() != NULL);
12742   match(Set cr (CmpN (LoadN mem) zero));
12743 
12744   ins_cost(500); // XXX
12745   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12746   ins_encode %{
12747     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12748   %}
12749   ins_pipe(ialu_cr_reg_mem);
12750 %}
12751 
12752 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12753 %{
12754   predicate(CompressedOops::base() == NULL);
12755   match(Set cr (CmpN (LoadN mem) zero));
12756 
12757   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12758   ins_encode %{
12759     __ cmpl(r12, $mem$$Address);
12760   %}
12761   ins_pipe(ialu_cr_reg_mem);
12762 %}
12763 
12764 // Yanked all unsigned pointer compare operations.
12765 // Pointer compares are done with CmpP which is already unsigned.
12766 
12767 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12768 %{
12769   match(Set cr (CmpL op1 op2));
12770 
12771   format %{ "cmpq    $op1, $op2" %}
12772   ins_encode %{
12773     __ cmpq($op1$$Register, $op2$$Register);
12774   %}
12775   ins_pipe(ialu_cr_reg_reg);
12776 %}
12777 
12778 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12779 %{
12780   match(Set cr (CmpL op1 op2));
12781 
12782   format %{ "cmpq    $op1, $op2" %}
12783   ins_encode %{
12784     __ cmpq($op1$$Register, $op2$$constant);
12785   %}
12786   ins_pipe(ialu_cr_reg_imm);
12787 %}
12788 
12789 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12790 %{
12791   match(Set cr (CmpL op1 (LoadL op2)));
12792 
12793   format %{ "cmpq    $op1, $op2" %}
12794   ins_encode %{
12795     __ cmpq($op1$$Register, $op2$$Address);
12796   %}
12797   ins_pipe(ialu_cr_reg_mem);
12798 %}
12799 
12800 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12801 %{
12802   match(Set cr (CmpL src zero));
12803 
12804   format %{ "testq   $src, $src" %}
12805   ins_encode %{
12806     __ testq($src$$Register, $src$$Register);
12807   %}
12808   ins_pipe(ialu_cr_reg_imm);
12809 %}
12810 
12811 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12812 %{
12813   match(Set cr (CmpL (AndL src con) zero));
12814 
12815   format %{ "testq   $src, $con\t# long" %}
12816   ins_encode %{
12817     __ testq($src$$Register, $con$$constant);
12818   %}
12819   ins_pipe(ialu_cr_reg_imm);
12820 %}
12821 
12822 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
12823 %{
12824   match(Set cr (CmpL (AndL src1 src2) zero));
12825 
12826   format %{ "testq   $src1, $src2\t# long" %}
12827   ins_encode %{
12828     __ testq($src1$$Register, $src2$$Register);
12829   %}
12830   ins_pipe(ialu_cr_reg_imm);
12831 %}
12832 
12833 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12834 %{
12835   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12836 
12837   format %{ "testq   $src, $mem" %}
12838   ins_encode %{
12839     __ testq($src$$Register, $mem$$Address);
12840   %}
12841   ins_pipe(ialu_cr_reg_mem);
12842 %}
12843 
12844 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12845 %{
12846   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12847 
12848   format %{ "testq   $src, $mem" %}
12849   ins_encode %{
12850     __ testq($src$$Register, $mem$$Address);
12851   %}
12852   ins_pipe(ialu_cr_reg_mem);
12853 %}
12854 
12855 // Manifest a CmpU result in an integer register.  Very painful.
12856 // This is the test to avoid.
12857 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
12858 %{
12859   match(Set dst (CmpU3 src1 src2));
12860   effect(KILL flags);
12861 
12862   ins_cost(275); // XXX
12863   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
12864             "movl    $dst, -1\n\t"
12865             "jb,u    done\n\t"
12866             "setne   $dst\n\t"
12867             "movzbl  $dst, $dst\n\t"
12868     "done:" %}
12869   ins_encode %{
12870     Label done;
12871     __ cmpl($src1$$Register, $src2$$Register);
12872     __ movl($dst$$Register, -1);
12873     __ jccb(Assembler::below, done);
12874     __ setb(Assembler::notZero, $dst$$Register);
12875     __ movzbl($dst$$Register, $dst$$Register);
12876     __ bind(done);
12877   %}
12878   ins_pipe(pipe_slow);
12879 %}
12880 
12881 // Manifest a CmpL result in an integer register.  Very painful.
12882 // This is the test to avoid.
12883 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12884 %{
12885   match(Set dst (CmpL3 src1 src2));
12886   effect(KILL flags);
12887 
12888   ins_cost(275); // XXX
12889   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12890             "movl    $dst, -1\n\t"
12891             "jl,s    done\n\t"
12892             "setne   $dst\n\t"
12893             "movzbl  $dst, $dst\n\t"
12894     "done:" %}
12895   ins_encode %{
12896     Label done;
12897     __ cmpq($src1$$Register, $src2$$Register);
12898     __ movl($dst$$Register, -1);
12899     __ jccb(Assembler::less, done);
12900     __ setb(Assembler::notZero, $dst$$Register);
12901     __ movzbl($dst$$Register, $dst$$Register);
12902     __ bind(done);
12903   %}
12904   ins_pipe(pipe_slow);
12905 %}
12906 
12907 // Manifest a CmpUL result in an integer register.  Very painful.
12908 // This is the test to avoid.
12909 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12910 %{
12911   match(Set dst (CmpUL3 src1 src2));
12912   effect(KILL flags);
12913 
12914   ins_cost(275); // XXX
12915   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12916             "movl    $dst, -1\n\t"
12917             "jb,u    done\n\t"
12918             "setne   $dst\n\t"
12919             "movzbl  $dst, $dst\n\t"
12920     "done:" %}
12921   ins_encode %{
12922     Label done;
12923     __ cmpq($src1$$Register, $src2$$Register);
12924     __ movl($dst$$Register, -1);
12925     __ jccb(Assembler::below, done);
12926     __ setb(Assembler::notZero, $dst$$Register);
12927     __ movzbl($dst$$Register, $dst$$Register);
12928     __ bind(done);
12929   %}
12930   ins_pipe(pipe_slow);
12931 %}
12932 
12933 // Unsigned long compare Instructions; really, same as signed long except they
12934 // produce an rFlagsRegU instead of rFlagsReg.
12935 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
12936 %{
12937   match(Set cr (CmpUL op1 op2));
12938 
12939   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12940   ins_encode %{
12941     __ cmpq($op1$$Register, $op2$$Register);
12942   %}
12943   ins_pipe(ialu_cr_reg_reg);
12944 %}
12945 
12946 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
12947 %{
12948   match(Set cr (CmpUL op1 op2));
12949 
12950   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12951   ins_encode %{
12952     __ cmpq($op1$$Register, $op2$$constant);
12953   %}
12954   ins_pipe(ialu_cr_reg_imm);
12955 %}
12956 
12957 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
12958 %{
12959   match(Set cr (CmpUL op1 (LoadL op2)));
12960 
12961   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12962   ins_encode %{
12963     __ cmpq($op1$$Register, $op2$$Address);
12964   %}
12965   ins_pipe(ialu_cr_reg_mem);
12966 %}
12967 
12968 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
12969 %{
12970   match(Set cr (CmpUL src zero));
12971 
12972   format %{ "testq   $src, $src\t# unsigned" %}
12973   ins_encode %{
12974     __ testq($src$$Register, $src$$Register);
12975   %}
12976   ins_pipe(ialu_cr_reg_imm);
12977 %}
12978 
12979 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
12980 %{
12981   match(Set cr (CmpI (LoadB mem) imm));
12982 
12983   ins_cost(125);
12984   format %{ "cmpb    $mem, $imm" %}
12985   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
12986   ins_pipe(ialu_cr_reg_mem);
12987 %}
12988 
12989 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
12990 %{
12991   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
12992 
12993   ins_cost(125);
12994   format %{ "testb   $mem, $imm\t# ubyte" %}
12995   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12996   ins_pipe(ialu_cr_reg_mem);
12997 %}
12998 
12999 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
13000 %{
13001   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
13002 
13003   ins_cost(125);
13004   format %{ "testb   $mem, $imm\t# byte" %}
13005   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13006   ins_pipe(ialu_cr_reg_mem);
13007 %}
13008 
13009 //----------Max and Min--------------------------------------------------------
13010 // Min Instructions
13011 
13012 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
13013 %{
13014   effect(USE_DEF dst, USE src, USE cr);
13015 
13016   format %{ "cmovlgt $dst, $src\t# min" %}
13017   ins_encode %{
13018     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
13019   %}
13020   ins_pipe(pipe_cmov_reg);
13021 %}
13022 
13023 
13024 instruct minI_rReg(rRegI dst, rRegI src)
13025 %{
13026   match(Set dst (MinI dst src));
13027 
13028   ins_cost(200);
13029   expand %{
13030     rFlagsReg cr;
13031     compI_rReg(cr, dst, src);
13032     cmovI_reg_g(dst, src, cr);
13033   %}
13034 %}
13035 
13036 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
13037 %{
13038   effect(USE_DEF dst, USE src, USE cr);
13039 
13040   format %{ "cmovllt $dst, $src\t# max" %}
13041   ins_encode %{
13042     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
13043   %}
13044   ins_pipe(pipe_cmov_reg);
13045 %}
13046 
13047 
13048 instruct maxI_rReg(rRegI dst, rRegI src)
13049 %{
13050   match(Set dst (MaxI dst src));
13051 
13052   ins_cost(200);
13053   expand %{
13054     rFlagsReg cr;
13055     compI_rReg(cr, dst, src);
13056     cmovI_reg_l(dst, src, cr);
13057   %}
13058 %}
13059 
13060 // ============================================================================
13061 // Branch Instructions
13062 
13063 // Jump Direct - Label defines a relative address from JMP+1
13064 instruct jmpDir(label labl)
13065 %{
13066   match(Goto);
13067   effect(USE labl);
13068 
13069   ins_cost(300);
13070   format %{ "jmp     $labl" %}
13071   size(5);
13072   ins_encode %{
13073     Label* L = $labl$$label;
13074     __ jmp(*L, false); // Always long jump
13075   %}
13076   ins_pipe(pipe_jmp);
13077 %}
13078 
13079 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13080 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
13081 %{
13082   match(If cop cr);
13083   effect(USE labl);
13084 
13085   ins_cost(300);
13086   format %{ "j$cop     $labl" %}
13087   size(6);
13088   ins_encode %{
13089     Label* L = $labl$$label;
13090     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13091   %}
13092   ins_pipe(pipe_jcc);
13093 %}
13094 
13095 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13096 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
13097 %{
13098   match(CountedLoopEnd cop cr);
13099   effect(USE labl);
13100 
13101   ins_cost(300);
13102   format %{ "j$cop     $labl\t# loop end" %}
13103   size(6);
13104   ins_encode %{
13105     Label* L = $labl$$label;
13106     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13107   %}
13108   ins_pipe(pipe_jcc);
13109 %}
13110 
13111 // Jump Direct Conditional - using unsigned comparison
13112 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13113   match(If cop cmp);
13114   effect(USE labl);
13115 
13116   ins_cost(300);
13117   format %{ "j$cop,u   $labl" %}
13118   size(6);
13119   ins_encode %{
13120     Label* L = $labl$$label;
13121     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13122   %}
13123   ins_pipe(pipe_jcc);
13124 %}
13125 
13126 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13127   match(If cop cmp);
13128   effect(USE labl);
13129 
13130   ins_cost(200);
13131   format %{ "j$cop,u   $labl" %}
13132   size(6);
13133   ins_encode %{
13134     Label* L = $labl$$label;
13135     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13136   %}
13137   ins_pipe(pipe_jcc);
13138 %}
13139 
13140 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13141   match(If cop cmp);
13142   effect(USE labl);
13143 
13144   ins_cost(200);
13145   format %{ $$template
13146     if ($cop$$cmpcode == Assembler::notEqual) {
13147       $$emit$$"jp,u    $labl\n\t"
13148       $$emit$$"j$cop,u   $labl"
13149     } else {
13150       $$emit$$"jp,u    done\n\t"
13151       $$emit$$"j$cop,u   $labl\n\t"
13152       $$emit$$"done:"
13153     }
13154   %}
13155   ins_encode %{
13156     Label* l = $labl$$label;
13157     if ($cop$$cmpcode == Assembler::notEqual) {
13158       __ jcc(Assembler::parity, *l, false);
13159       __ jcc(Assembler::notEqual, *l, false);
13160     } else if ($cop$$cmpcode == Assembler::equal) {
13161       Label done;
13162       __ jccb(Assembler::parity, done);
13163       __ jcc(Assembler::equal, *l, false);
13164       __ bind(done);
13165     } else {
13166        ShouldNotReachHere();
13167     }
13168   %}
13169   ins_pipe(pipe_jcc);
13170 %}
13171 
13172 // ============================================================================
13173 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
13174 // superklass array for an instance of the superklass.  Set a hidden
13175 // internal cache on a hit (cache is checked with exposed code in
13176 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13177 // encoding ALSO sets flags.
13178 
13179 instruct partialSubtypeCheck(rdi_RegP result,
13180                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13181                              rFlagsReg cr)
13182 %{
13183   match(Set result (PartialSubtypeCheck sub super));
13184   effect(KILL rcx, KILL cr);
13185 
13186   ins_cost(1100);  // slightly larger than the next version
13187   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13188             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13189             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13190             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
13191             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
13192             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13193             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
13194     "miss:\t" %}
13195 
13196   opcode(0x1); // Force a XOR of RDI
13197   ins_encode(enc_PartialSubtypeCheck());
13198   ins_pipe(pipe_slow);
13199 %}
13200 
13201 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
13202                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13203                                      immP0 zero,
13204                                      rdi_RegP result)
13205 %{
13206   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13207   effect(KILL rcx, KILL result);
13208 
13209   ins_cost(1000);
13210   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13211             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13212             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13213             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
13214             "jne,s   miss\t\t# Missed: flags nz\n\t"
13215             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13216     "miss:\t" %}
13217 
13218   opcode(0x0); // No need to XOR RDI
13219   ins_encode(enc_PartialSubtypeCheck());
13220   ins_pipe(pipe_slow);
13221 %}
13222 
13223 // ============================================================================
13224 // Branch Instructions -- short offset versions
13225 //
13226 // These instructions are used to replace jumps of a long offset (the default
13227 // match) with jumps of a shorter offset.  These instructions are all tagged
13228 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13229 // match rules in general matching.  Instead, the ADLC generates a conversion
13230 // method in the MachNode which can be used to do in-place replacement of the
13231 // long variant with the shorter variant.  The compiler will determine if a
13232 // branch can be taken by the is_short_branch_offset() predicate in the machine
13233 // specific code section of the file.
13234 
13235 // Jump Direct - Label defines a relative address from JMP+1
13236 instruct jmpDir_short(label labl) %{
13237   match(Goto);
13238   effect(USE labl);
13239 
13240   ins_cost(300);
13241   format %{ "jmp,s   $labl" %}
13242   size(2);
13243   ins_encode %{
13244     Label* L = $labl$$label;
13245     __ jmpb(*L);
13246   %}
13247   ins_pipe(pipe_jmp);
13248   ins_short_branch(1);
13249 %}
13250 
13251 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13252 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
13253   match(If cop cr);
13254   effect(USE labl);
13255 
13256   ins_cost(300);
13257   format %{ "j$cop,s   $labl" %}
13258   size(2);
13259   ins_encode %{
13260     Label* L = $labl$$label;
13261     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13262   %}
13263   ins_pipe(pipe_jcc);
13264   ins_short_branch(1);
13265 %}
13266 
13267 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13268 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
13269   match(CountedLoopEnd cop cr);
13270   effect(USE labl);
13271 
13272   ins_cost(300);
13273   format %{ "j$cop,s   $labl\t# loop end" %}
13274   size(2);
13275   ins_encode %{
13276     Label* L = $labl$$label;
13277     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13278   %}
13279   ins_pipe(pipe_jcc);
13280   ins_short_branch(1);
13281 %}
13282 
13283 // Jump Direct Conditional - using unsigned comparison
13284 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13285   match(If cop cmp);
13286   effect(USE labl);
13287 
13288   ins_cost(300);
13289   format %{ "j$cop,us  $labl" %}
13290   size(2);
13291   ins_encode %{
13292     Label* L = $labl$$label;
13293     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13294   %}
13295   ins_pipe(pipe_jcc);
13296   ins_short_branch(1);
13297 %}
13298 
13299 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13300   match(If cop cmp);
13301   effect(USE labl);
13302 
13303   ins_cost(300);
13304   format %{ "j$cop,us  $labl" %}
13305   size(2);
13306   ins_encode %{
13307     Label* L = $labl$$label;
13308     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13309   %}
13310   ins_pipe(pipe_jcc);
13311   ins_short_branch(1);
13312 %}
13313 
13314 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13315   match(If cop cmp);
13316   effect(USE labl);
13317 
13318   ins_cost(300);
13319   format %{ $$template
13320     if ($cop$$cmpcode == Assembler::notEqual) {
13321       $$emit$$"jp,u,s  $labl\n\t"
13322       $$emit$$"j$cop,u,s  $labl"
13323     } else {
13324       $$emit$$"jp,u,s  done\n\t"
13325       $$emit$$"j$cop,u,s  $labl\n\t"
13326       $$emit$$"done:"
13327     }
13328   %}
13329   size(4);
13330   ins_encode %{
13331     Label* l = $labl$$label;
13332     if ($cop$$cmpcode == Assembler::notEqual) {
13333       __ jccb(Assembler::parity, *l);
13334       __ jccb(Assembler::notEqual, *l);
13335     } else if ($cop$$cmpcode == Assembler::equal) {
13336       Label done;
13337       __ jccb(Assembler::parity, done);
13338       __ jccb(Assembler::equal, *l);
13339       __ bind(done);
13340     } else {
13341        ShouldNotReachHere();
13342     }
13343   %}
13344   ins_pipe(pipe_jcc);
13345   ins_short_branch(1);
13346 %}
13347 
13348 // ============================================================================
13349 // inlined locking and unlocking
13350 
13351 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
13352   predicate(Compile::current()->use_rtm());
13353   match(Set cr (FastLock object box));
13354   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13355   ins_cost(300);
13356   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13357   ins_encode %{
13358     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13359                  $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread,
13360                  _rtm_counters, _stack_rtm_counters,
13361                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13362                  true, ra_->C->profile_rtm());
13363   %}
13364   ins_pipe(pipe_slow);
13365 %}
13366 
13367 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
13368   predicate(!Compile::current()->use_rtm());
13369   match(Set cr (FastLock object box));
13370   effect(TEMP tmp, TEMP scr, USE_KILL box);
13371   ins_cost(300);
13372   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
13373   ins_encode %{
13374     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13375                  $scr$$Register, noreg, noreg, r15_thread, nullptr, nullptr, nullptr, false, false);
13376   %}
13377   ins_pipe(pipe_slow);
13378 %}
13379 
13380 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13381   match(Set cr (FastUnlock object box));
13382   effect(TEMP tmp, USE_KILL box);
13383   ins_cost(300);
13384   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13385   ins_encode %{
13386     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13387   %}
13388   ins_pipe(pipe_slow);
13389 %}
13390 
13391 
13392 // ============================================================================
13393 // Safepoint Instructions
13394 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13395 %{
13396   match(SafePoint poll);
13397   effect(KILL cr, USE poll);
13398 
13399   format %{ "testl   rax, [$poll]\t"
13400             "# Safepoint: poll for GC" %}
13401   ins_cost(125);
13402   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13403   ins_encode %{
13404     __ relocate(relocInfo::poll_type);
13405     address pre_pc = __ pc();
13406     __ testl(rax, Address($poll$$Register, 0));
13407     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13408   %}
13409   ins_pipe(ialu_reg_mem);
13410 %}
13411 
13412 instruct mask_all_evexL(kReg dst, rRegL src) %{
13413   match(Set dst (MaskAll src));
13414   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
13415   ins_encode %{
13416     int mask_len = Matcher::vector_length(this);
13417     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13418   %}
13419   ins_pipe( pipe_slow );
13420 %}
13421 
13422 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
13423   predicate(Matcher::vector_length(n) > 32);
13424   match(Set dst (MaskAll src));
13425   effect(TEMP tmp);
13426   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
13427   ins_encode %{
13428     int mask_len = Matcher::vector_length(this);
13429     __ movslq($tmp$$Register, $src$$Register);
13430     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
13431   %}
13432   ins_pipe( pipe_slow );
13433 %}
13434 
13435 // ============================================================================
13436 // Procedure Call/Return Instructions
13437 // Call Java Static Instruction
13438 // Note: If this code changes, the corresponding ret_addr_offset() and
13439 //       compute_padding() functions will have to be adjusted.
13440 instruct CallStaticJavaDirect(method meth) %{
13441   match(CallStaticJava);
13442   effect(USE meth);
13443 
13444   ins_cost(300);
13445   format %{ "call,static " %}
13446   opcode(0xE8); /* E8 cd */
13447   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13448   ins_pipe(pipe_slow);
13449   ins_alignment(4);
13450 %}
13451 
13452 // Call Java Dynamic Instruction
13453 // Note: If this code changes, the corresponding ret_addr_offset() and
13454 //       compute_padding() functions will have to be adjusted.
13455 instruct CallDynamicJavaDirect(method meth)
13456 %{
13457   match(CallDynamicJava);
13458   effect(USE meth);
13459 
13460   ins_cost(300);
13461   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13462             "call,dynamic " %}
13463   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13464   ins_pipe(pipe_slow);
13465   ins_alignment(4);
13466 %}
13467 
13468 // Call Runtime Instruction
13469 instruct CallRuntimeDirect(method meth)
13470 %{
13471   match(CallRuntime);
13472   effect(USE meth);
13473 
13474   ins_cost(300);
13475   format %{ "call,runtime " %}
13476   ins_encode(clear_avx, Java_To_Runtime(meth));
13477   ins_pipe(pipe_slow);
13478 %}
13479 
13480 // Call runtime without safepoint
13481 instruct CallLeafDirect(method meth)
13482 %{
13483   match(CallLeaf);
13484   effect(USE meth);
13485 
13486   ins_cost(300);
13487   format %{ "call_leaf,runtime " %}
13488   ins_encode(clear_avx, Java_To_Runtime(meth));
13489   ins_pipe(pipe_slow);
13490 %}
13491 
13492 // Call runtime without safepoint and with vector arguments
13493 instruct CallLeafDirectVector(method meth)
13494 %{
13495   match(CallLeafVector);
13496   effect(USE meth);
13497 
13498   ins_cost(300);
13499   format %{ "call_leaf,vector " %}
13500   ins_encode(Java_To_Runtime(meth));
13501   ins_pipe(pipe_slow);
13502 %}
13503 
13504 // Call runtime without safepoint
13505 instruct CallLeafNoFPDirect(method meth)
13506 %{
13507   match(CallLeafNoFP);
13508   effect(USE meth);
13509 
13510   ins_cost(300);
13511   format %{ "call_leaf_nofp,runtime " %}
13512   ins_encode(clear_avx, Java_To_Runtime(meth));
13513   ins_pipe(pipe_slow);
13514 %}
13515 
13516 // Return Instruction
13517 // Remove the return address & jump to it.
13518 // Notice: We always emit a nop after a ret to make sure there is room
13519 // for safepoint patching
13520 instruct Ret()
13521 %{
13522   match(Return);
13523 
13524   format %{ "ret" %}
13525   ins_encode %{
13526     __ ret(0);
13527   %}
13528   ins_pipe(pipe_jmp);
13529 %}
13530 
13531 // Tail Call; Jump from runtime stub to Java code.
13532 // Also known as an 'interprocedural jump'.
13533 // Target of jump will eventually return to caller.
13534 // TailJump below removes the return address.
13535 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
13536 %{
13537   match(TailCall jump_target method_ptr);
13538 
13539   ins_cost(300);
13540   format %{ "jmp     $jump_target\t# rbx holds method" %}
13541   ins_encode %{
13542     __ jmp($jump_target$$Register);
13543   %}
13544   ins_pipe(pipe_jmp);
13545 %}
13546 
13547 // Tail Jump; remove the return address; jump to target.
13548 // TailCall above leaves the return address around.
13549 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13550 %{
13551   match(TailJump jump_target ex_oop);
13552 
13553   ins_cost(300);
13554   format %{ "popq    rdx\t# pop return address\n\t"
13555             "jmp     $jump_target" %}
13556   ins_encode %{
13557     __ popq(as_Register(RDX_enc));
13558     __ jmp($jump_target$$Register);
13559   %}
13560   ins_pipe(pipe_jmp);
13561 %}
13562 
13563 // Create exception oop: created by stack-crawling runtime code.
13564 // Created exception is now available to this handler, and is setup
13565 // just prior to jumping to this handler.  No code emitted.
13566 instruct CreateException(rax_RegP ex_oop)
13567 %{
13568   match(Set ex_oop (CreateEx));
13569 
13570   size(0);
13571   // use the following format syntax
13572   format %{ "# exception oop is in rax; no code emitted" %}
13573   ins_encode();
13574   ins_pipe(empty);
13575 %}
13576 
13577 // Rethrow exception:
13578 // The exception oop will come in the first argument position.
13579 // Then JUMP (not call) to the rethrow stub code.
13580 instruct RethrowException()
13581 %{
13582   match(Rethrow);
13583 
13584   // use the following format syntax
13585   format %{ "jmp     rethrow_stub" %}
13586   ins_encode(enc_rethrow);
13587   ins_pipe(pipe_jmp);
13588 %}
13589 
13590 // ============================================================================
13591 // This name is KNOWN by the ADLC and cannot be changed.
13592 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13593 // for this guy.
13594 instruct tlsLoadP(r15_RegP dst) %{
13595   match(Set dst (ThreadLocal));
13596   effect(DEF dst);
13597 
13598   size(0);
13599   format %{ "# TLS is in R15" %}
13600   ins_encode( /*empty encoding*/ );
13601   ins_pipe(ialu_reg_reg);
13602 %}
13603 
13604 
13605 //----------PEEPHOLE RULES-----------------------------------------------------
13606 // These must follow all instruction definitions as they use the names
13607 // defined in the instructions definitions.
13608 //
13609 // peeppredicate ( rule_predicate );
13610 // // the predicate unless which the peephole rule will be ignored
13611 //
13612 // peepmatch ( root_instr_name [preceding_instruction]* );
13613 //
13614 // peepprocedure ( procedure_name );
13615 // // provide a procedure name to perform the optimization, the procedure should
13616 // // reside in the architecture dependent peephole file, the method has the
13617 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
13618 // // with the arguments being the basic block, the current node index inside the
13619 // // block, the register allocator, the functions upon invoked return a new node
13620 // // defined in peepreplace, and the rules of the nodes appearing in the
13621 // // corresponding peepmatch, the function return true if successful, else
13622 // // return false
13623 //
13624 // peepconstraint %{
13625 // (instruction_number.operand_name relational_op instruction_number.operand_name
13626 //  [, ...] );
13627 // // instruction numbers are zero-based using left to right order in peepmatch
13628 //
13629 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13630 // // provide an instruction_number.operand_name for each operand that appears
13631 // // in the replacement instruction's match rule
13632 //
13633 // ---------VM FLAGS---------------------------------------------------------
13634 //
13635 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13636 //
13637 // Each peephole rule is given an identifying number starting with zero and
13638 // increasing by one in the order seen by the parser.  An individual peephole
13639 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13640 // on the command-line.
13641 //
13642 // ---------CURRENT LIMITATIONS----------------------------------------------
13643 //
13644 // Only transformations inside a basic block (do we need more for peephole)
13645 //
13646 // ---------EXAMPLE----------------------------------------------------------
13647 //
13648 // // pertinent parts of existing instructions in architecture description
13649 // instruct movI(rRegI dst, rRegI src)
13650 // %{
13651 //   match(Set dst (CopyI src));
13652 // %}
13653 //
13654 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13655 // %{
13656 //   match(Set dst (AddI dst src));
13657 //   effect(KILL cr);
13658 // %}
13659 //
13660 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
13661 // %{
13662 //   match(Set dst (AddI dst src));
13663 // %}
13664 //
13665 // 1. Simple replacement
13666 // - Only match adjacent instructions in same basic block
13667 // - Only equality constraints
13668 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
13669 // - Only one replacement instruction
13670 //
13671 // // Change (inc mov) to lea
13672 // peephole %{
13673 //   // lea should only be emitted when beneficial
13674 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13675 //   // increment preceded by register-register move
13676 //   peepmatch ( incI_rReg movI );
13677 //   // require that the destination register of the increment
13678 //   // match the destination register of the move
13679 //   peepconstraint ( 0.dst == 1.dst );
13680 //   // construct a replacement instruction that sets
13681 //   // the destination to ( move's source register + one )
13682 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13683 // %}
13684 //
13685 // 2. Procedural replacement
13686 // - More flexible finding relevent nodes
13687 // - More flexible constraints
13688 // - More flexible transformations
13689 // - May utilise architecture-dependent API more effectively
13690 // - Currently only one replacement instruction due to adlc parsing capabilities
13691 //
13692 // // Change (inc mov) to lea
13693 // peephole %{
13694 //   // lea should only be emitted when beneficial
13695 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13696 //   // the rule numbers of these nodes inside are passed into the function below
13697 //   peepmatch ( incI_rReg movI );
13698 //   // the method that takes the responsibility of transformation
13699 //   peepprocedure ( inc_mov_to_lea );
13700 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
13701 //   // node is passed into the function above
13702 //   peepreplace ( leaI_rReg_immI() );
13703 // %}
13704 
13705 // These instructions is not matched by the matcher but used by the peephole
13706 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
13707 %{
13708   predicate(false);
13709   match(Set dst (AddI src1 src2));
13710   format %{ "leal    $dst, [$src1 + $src2]" %}
13711   ins_encode %{
13712     Register dst = $dst$$Register;
13713     Register src1 = $src1$$Register;
13714     Register src2 = $src2$$Register;
13715     if (src1 != rbp && src1 != r13) {
13716       __ leal(dst, Address(src1, src2, Address::times_1));
13717     } else {
13718       assert(src2 != rbp && src2 != r13, "");
13719       __ leal(dst, Address(src2, src1, Address::times_1));
13720     }
13721   %}
13722   ins_pipe(ialu_reg_reg);
13723 %}
13724 
13725 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
13726 %{
13727   predicate(false);
13728   match(Set dst (AddI src1 src2));
13729   format %{ "leal    $dst, [$src1 + $src2]" %}
13730   ins_encode %{
13731     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
13732   %}
13733   ins_pipe(ialu_reg_reg);
13734 %}
13735 
13736 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
13737 %{
13738   predicate(false);
13739   match(Set dst (LShiftI src shift));
13740   format %{ "leal    $dst, [$src << $shift]" %}
13741   ins_encode %{
13742     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13743     Register src = $src$$Register;
13744     if (scale == Address::times_2 && src != rbp && src != r13) {
13745       __ leal($dst$$Register, Address(src, src, Address::times_1));
13746     } else {
13747       __ leal($dst$$Register, Address(noreg, src, scale));
13748     }
13749   %}
13750   ins_pipe(ialu_reg_reg);
13751 %}
13752 
13753 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
13754 %{
13755   predicate(false);
13756   match(Set dst (AddL src1 src2));
13757   format %{ "leaq    $dst, [$src1 + $src2]" %}
13758   ins_encode %{
13759     Register dst = $dst$$Register;
13760     Register src1 = $src1$$Register;
13761     Register src2 = $src2$$Register;
13762     if (src1 != rbp && src1 != r13) {
13763       __ leaq(dst, Address(src1, src2, Address::times_1));
13764     } else {
13765       assert(src2 != rbp && src2 != r13, "");
13766       __ leaq(dst, Address(src2, src1, Address::times_1));
13767     }
13768   %}
13769   ins_pipe(ialu_reg_reg);
13770 %}
13771 
13772 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
13773 %{
13774   predicate(false);
13775   match(Set dst (AddL src1 src2));
13776   format %{ "leaq    $dst, [$src1 + $src2]" %}
13777   ins_encode %{
13778     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
13779   %}
13780   ins_pipe(ialu_reg_reg);
13781 %}
13782 
13783 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
13784 %{
13785   predicate(false);
13786   match(Set dst (LShiftL src shift));
13787   format %{ "leaq    $dst, [$src << $shift]" %}
13788   ins_encode %{
13789     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13790     Register src = $src$$Register;
13791     if (scale == Address::times_2 && src != rbp && src != r13) {
13792       __ leaq($dst$$Register, Address(src, src, Address::times_1));
13793     } else {
13794       __ leaq($dst$$Register, Address(noreg, src, scale));
13795     }
13796   %}
13797   ins_pipe(ialu_reg_reg);
13798 %}
13799 
13800 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
13801 // sal}) with lea instructions. The {add, sal} rules are beneficial in
13802 // processors with at least partial ALU support for lea
13803 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
13804 // beneficial for processors with full ALU support
13805 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
13806 
13807 peephole
13808 %{
13809   peeppredicate(VM_Version::supports_fast_2op_lea());
13810   peepmatch (addI_rReg);
13811   peepprocedure (lea_coalesce_reg);
13812   peepreplace (leaI_rReg_rReg_peep());
13813 %}
13814 
13815 peephole
13816 %{
13817   peeppredicate(VM_Version::supports_fast_2op_lea());
13818   peepmatch (addI_rReg_imm);
13819   peepprocedure (lea_coalesce_imm);
13820   peepreplace (leaI_rReg_immI_peep());
13821 %}
13822 
13823 peephole
13824 %{
13825   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13826                 VM_Version::is_intel_cascade_lake());
13827   peepmatch (incI_rReg);
13828   peepprocedure (lea_coalesce_imm);
13829   peepreplace (leaI_rReg_immI_peep());
13830 %}
13831 
13832 peephole
13833 %{
13834   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13835                 VM_Version::is_intel_cascade_lake());
13836   peepmatch (decI_rReg);
13837   peepprocedure (lea_coalesce_imm);
13838   peepreplace (leaI_rReg_immI_peep());
13839 %}
13840 
13841 peephole
13842 %{
13843   peeppredicate(VM_Version::supports_fast_2op_lea());
13844   peepmatch (salI_rReg_immI2);
13845   peepprocedure (lea_coalesce_imm);
13846   peepreplace (leaI_rReg_immI2_peep());
13847 %}
13848 
13849 peephole
13850 %{
13851   peeppredicate(VM_Version::supports_fast_2op_lea());
13852   peepmatch (addL_rReg);
13853   peepprocedure (lea_coalesce_reg);
13854   peepreplace (leaL_rReg_rReg_peep());
13855 %}
13856 
13857 peephole
13858 %{
13859   peeppredicate(VM_Version::supports_fast_2op_lea());
13860   peepmatch (addL_rReg_imm);
13861   peepprocedure (lea_coalesce_imm);
13862   peepreplace (leaL_rReg_immL32_peep());
13863 %}
13864 
13865 peephole
13866 %{
13867   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13868                 VM_Version::is_intel_cascade_lake());
13869   peepmatch (incL_rReg);
13870   peepprocedure (lea_coalesce_imm);
13871   peepreplace (leaL_rReg_immL32_peep());
13872 %}
13873 
13874 peephole
13875 %{
13876   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13877                 VM_Version::is_intel_cascade_lake());
13878   peepmatch (decL_rReg);
13879   peepprocedure (lea_coalesce_imm);
13880   peepreplace (leaL_rReg_immL32_peep());
13881 %}
13882 
13883 peephole
13884 %{
13885   peeppredicate(VM_Version::supports_fast_2op_lea());
13886   peepmatch (salL_rReg_immI2);
13887   peepprocedure (lea_coalesce_imm);
13888   peepreplace (leaL_rReg_immI2_peep());
13889 %}
13890 
13891 //----------SMARTSPILL RULES---------------------------------------------------
13892 // These must follow all instruction definitions as they use the names
13893 // defined in the instructions definitions.