1 //
    2 // Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RCX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RCX int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 
  323 source_hpp %{
  324 
  325 #include "peephole_x86_64.hpp"
  326 
  327 %}
  328 
  329 // Register masks
  330 source_hpp %{
  331 
  332 extern RegMask _ANY_REG_mask;
  333 extern RegMask _PTR_REG_mask;
  334 extern RegMask _PTR_REG_NO_RBP_mask;
  335 extern RegMask _PTR_NO_RAX_REG_mask;
  336 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  337 extern RegMask _LONG_REG_mask;
  338 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  339 extern RegMask _LONG_NO_RCX_REG_mask;
  340 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  341 extern RegMask _INT_REG_mask;
  342 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  343 extern RegMask _INT_NO_RCX_REG_mask;
  344 extern RegMask _INT_NO_RBP_R13_REG_mask;
  345 extern RegMask _FLOAT_REG_mask;
  346 
  347 extern RegMask _STACK_OR_PTR_REG_mask;
  348 extern RegMask _STACK_OR_LONG_REG_mask;
  349 extern RegMask _STACK_OR_INT_REG_mask;
  350 
  351 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  352 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  353 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  354 
  355 %}
  356 
  357 source %{
  358 #define   RELOC_IMM64    Assembler::imm_operand
  359 #define   RELOC_DISP32   Assembler::disp32_operand
  360 
  361 #define __ _masm.
  362 
  363 RegMask _ANY_REG_mask;
  364 RegMask _PTR_REG_mask;
  365 RegMask _PTR_REG_NO_RBP_mask;
  366 RegMask _PTR_NO_RAX_REG_mask;
  367 RegMask _PTR_NO_RAX_RBX_REG_mask;
  368 RegMask _LONG_REG_mask;
  369 RegMask _LONG_NO_RAX_RDX_REG_mask;
  370 RegMask _LONG_NO_RCX_REG_mask;
  371 RegMask _LONG_NO_RBP_R13_REG_mask;
  372 RegMask _INT_REG_mask;
  373 RegMask _INT_NO_RAX_RDX_REG_mask;
  374 RegMask _INT_NO_RCX_REG_mask;
  375 RegMask _INT_NO_RBP_R13_REG_mask;
  376 RegMask _FLOAT_REG_mask;
  377 RegMask _STACK_OR_PTR_REG_mask;
  378 RegMask _STACK_OR_LONG_REG_mask;
  379 RegMask _STACK_OR_INT_REG_mask;
  380 
  381 static bool need_r12_heapbase() {
  382   return UseCompressedOops;
  383 }
  384 
  385 void reg_mask_init() {
  386   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  387   // We derive a number of subsets from it.
  388   _ANY_REG_mask = _ALL_REG_mask;
  389 
  390   if (PreserveFramePointer) {
  391     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  392     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  393   }
  394   if (need_r12_heapbase()) {
  395     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  396     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  397   }
  398 
  399   _PTR_REG_mask = _ANY_REG_mask;
  400   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  401   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  402   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  403   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  404 
  405   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  406   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  407 
  408   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  409   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  410   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  411 
  412   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  413   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  414   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  415 
  416   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  417   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  418   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  419 
  420   _LONG_REG_mask = _PTR_REG_mask;
  421   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  422   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  423 
  424   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  425   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  426   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  427   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  428   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  429 
  430   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  431   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  432   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  433 
  434   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  435   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  436   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  437   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  438   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  439 
  440   _INT_REG_mask = _ALL_INT_REG_mask;
  441   if (PreserveFramePointer) {
  442     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  443   }
  444   if (need_r12_heapbase()) {
  445     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  446   }
  447 
  448   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  449   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  450 
  451   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  452   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  453   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  454 
  455   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  456   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  457 
  458   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  459   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  460   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  461 
  462   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  463   // from the float_reg_legacy/float_reg_evex register class.
  464   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  465 }
  466 
  467 static bool generate_vzeroupper(Compile* C) {
  468   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  469 }
  470 
  471 static int clear_avx_size() {
  472   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   if (_entry_point == NULL) {
  494     // CallLeafNoFPInDirect
  495     return 3; // callq (register)
  496   }
  497   int offset = 13; // movq r10,#addr; callq (r10)
  498   if (this->ideal_Opcode() != Op_CallLeafVector) {
  499     offset += clear_avx_size();
  500   }
  501   return offset;
  502 }
  503 
  504 //
  505 // Compute padding required for nodes which need alignment
  506 //
  507 
  508 // The address of the call instruction needs to be 4-byte aligned to
  509 // ensure that it does not span a cache line so that it can be patched.
  510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  511 {
  512   current_offset += clear_avx_size(); // skip vzeroupper
  513   current_offset += 1; // skip call opcode byte
  514   return align_up(current_offset, alignment_required()) - current_offset;
  515 }
  516 
  517 // The address of the call instruction needs to be 4-byte aligned to
  518 // ensure that it does not span a cache line so that it can be patched.
  519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  520 {
  521   current_offset += clear_avx_size(); // skip vzeroupper
  522   current_offset += 11; // skip movq instruction + call opcode byte
  523   return align_up(current_offset, alignment_required()) - current_offset;
  524 }
  525 
  526 // EMIT_RM()
  527 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  528   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  529   cbuf.insts()->emit_int8(c);
  530 }
  531 
  532 // EMIT_CC()
  533 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  534   unsigned char c = (unsigned char) (f1 | f2);
  535   cbuf.insts()->emit_int8(c);
  536 }
  537 
  538 // EMIT_OPCODE()
  539 void emit_opcode(CodeBuffer &cbuf, int code) {
  540   cbuf.insts()->emit_int8((unsigned char) code);
  541 }
  542 
  543 // EMIT_OPCODE() w/ relocation information
  544 void emit_opcode(CodeBuffer &cbuf,
  545                  int code, relocInfo::relocType reloc, int offset, int format)
  546 {
  547   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  548   emit_opcode(cbuf, code);
  549 }
  550 
  551 // EMIT_D8()
  552 void emit_d8(CodeBuffer &cbuf, int d8) {
  553   cbuf.insts()->emit_int8((unsigned char) d8);
  554 }
  555 
  556 // EMIT_D16()
  557 void emit_d16(CodeBuffer &cbuf, int d16) {
  558   cbuf.insts()->emit_int16(d16);
  559 }
  560 
  561 // EMIT_D32()
  562 void emit_d32(CodeBuffer &cbuf, int d32) {
  563   cbuf.insts()->emit_int32(d32);
  564 }
  565 
  566 // EMIT_D64()
  567 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  568   cbuf.insts()->emit_int64(d64);
  569 }
  570 
  571 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  572 void emit_d32_reloc(CodeBuffer& cbuf,
  573                     int d32,
  574                     relocInfo::relocType reloc,
  575                     int format)
  576 {
  577   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  578   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  579   cbuf.insts()->emit_int32(d32);
  580 }
  581 
  582 // emit 32 bit value and construct relocation entry from RelocationHolder
  583 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
  584 #ifdef ASSERT
  585   if (rspec.reloc()->type() == relocInfo::oop_type &&
  586       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
  587     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
  588     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  589   }
  590 #endif
  591   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  592   cbuf.insts()->emit_int32(d32);
  593 }
  594 
  595 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  596   address next_ip = cbuf.insts_end() + 4;
  597   emit_d32_reloc(cbuf, (int) (addr - next_ip),
  598                  external_word_Relocation::spec(addr),
  599                  RELOC_DISP32);
  600 }
  601 
  602 
  603 // emit 64 bit value and construct relocation entry from relocInfo::relocType
  604 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  605   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  606   cbuf.insts()->emit_int64(d64);
  607 }
  608 
  609 // emit 64 bit value and construct relocation entry from RelocationHolder
  610 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
  611 #ifdef ASSERT
  612   if (rspec.reloc()->type() == relocInfo::oop_type &&
  613       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
  614     assert(Universe::heap()->is_in((address)d64), "should be real oop");
  615     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  616   }
  617 #endif
  618   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  619   cbuf.insts()->emit_int64(d64);
  620 }
  621 
  622 // Access stack slot for load or store
  623 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
  624 {
  625   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  626   if (-0x80 <= disp && disp < 0x80) {
  627     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
  628     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  629     emit_d8(cbuf, disp);     // Displacement  // R/M byte
  630   } else {
  631     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
  632     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  633     emit_d32(cbuf, disp);     // Displacement // R/M byte
  634   }
  635 }
  636 
  637    // rRegI ereg, memory mem) %{    // emit_reg_mem
  638 void encode_RegMem(CodeBuffer &cbuf,
  639                    int reg,
  640                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
  641 {
  642   assert(disp_reloc == relocInfo::none, "cannot have disp");
  643   int regenc = reg & 7;
  644   int baseenc = base & 7;
  645   int indexenc = index & 7;
  646 
  647   // There is no index & no scale, use form without SIB byte
  648   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
  649     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  650     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  651       emit_rm(cbuf, 0x0, regenc, baseenc); // *
  652     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  653       // If 8-bit displacement, mode 0x1
  654       emit_rm(cbuf, 0x1, regenc, baseenc); // *
  655       emit_d8(cbuf, disp);
  656     } else {
  657       // If 32-bit displacement
  658       if (base == -1) { // Special flag for absolute address
  659         emit_rm(cbuf, 0x0, regenc, 0x5); // *
  660         if (disp_reloc != relocInfo::none) {
  661           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  662         } else {
  663           emit_d32(cbuf, disp);
  664         }
  665       } else {
  666         // Normal base + offset
  667         emit_rm(cbuf, 0x2, regenc, baseenc); // *
  668         if (disp_reloc != relocInfo::none) {
  669           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  670         } else {
  671           emit_d32(cbuf, disp);
  672         }
  673       }
  674     }
  675   } else {
  676     // Else, encode with the SIB byte
  677     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  678     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  679       // If no displacement
  680       emit_rm(cbuf, 0x0, regenc, 0x4); // *
  681       emit_rm(cbuf, scale, indexenc, baseenc);
  682     } else {
  683       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  684         // If 8-bit displacement, mode 0x1
  685         emit_rm(cbuf, 0x1, regenc, 0x4); // *
  686         emit_rm(cbuf, scale, indexenc, baseenc);
  687         emit_d8(cbuf, disp);
  688       } else {
  689         // If 32-bit displacement
  690         if (base == 0x04 ) {
  691           emit_rm(cbuf, 0x2, regenc, 0x4);
  692           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
  693         } else {
  694           emit_rm(cbuf, 0x2, regenc, 0x4);
  695           emit_rm(cbuf, scale, indexenc, baseenc); // *
  696         }
  697         if (disp_reloc != relocInfo::none) {
  698           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  699         } else {
  700           emit_d32(cbuf, disp);
  701         }
  702       }
  703     }
  704   }
  705 }
  706 
  707 // This could be in MacroAssembler but it's fairly C2 specific
  708 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  709   Label exit;
  710   __ jccb(Assembler::noParity, exit);
  711   __ pushf();
  712   //
  713   // comiss/ucomiss instructions set ZF,PF,CF flags and
  714   // zero OF,AF,SF for NaN values.
  715   // Fixup flags by zeroing ZF,PF so that compare of NaN
  716   // values returns 'less than' result (CF is set).
  717   // Leave the rest of flags unchanged.
  718   //
  719   //    7 6 5 4 3 2 1 0
  720   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  721   //    0 0 1 0 1 0 1 1   (0x2B)
  722   //
  723   __ andq(Address(rsp, 0), 0xffffff2b);
  724   __ popf();
  725   __ bind(exit);
  726 }
  727 
  728 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  729   Label done;
  730   __ movl(dst, -1);
  731   __ jcc(Assembler::parity, done);
  732   __ jcc(Assembler::below, done);
  733   __ setb(Assembler::notEqual, dst);
  734   __ movzbl(dst, dst);
  735   __ bind(done);
  736 }
  737 
  738 // Math.min()    # Math.max()
  739 // --------------------------
  740 // ucomis[s/d]   #
  741 // ja   -> b     # a
  742 // jp   -> NaN   # NaN
  743 // jb   -> a     # b
  744 // je            #
  745 // |-jz -> a | b # a & b
  746 // |    -> a     #
  747 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  748                      XMMRegister a, XMMRegister b,
  749                      XMMRegister xmmt, Register rt,
  750                      bool min, bool single) {
  751 
  752   Label nan, zero, below, above, done;
  753 
  754   if (single)
  755     __ ucomiss(a, b);
  756   else
  757     __ ucomisd(a, b);
  758 
  759   if (dst->encoding() != (min ? b : a)->encoding())
  760     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  761   else
  762     __ jccb(Assembler::above, done);
  763 
  764   __ jccb(Assembler::parity, nan);  // PF=1
  765   __ jccb(Assembler::below, below); // CF=1
  766 
  767   // equal
  768   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  769   if (single) {
  770     __ ucomiss(a, xmmt);
  771     __ jccb(Assembler::equal, zero);
  772 
  773     __ movflt(dst, a);
  774     __ jmp(done);
  775   }
  776   else {
  777     __ ucomisd(a, xmmt);
  778     __ jccb(Assembler::equal, zero);
  779 
  780     __ movdbl(dst, a);
  781     __ jmp(done);
  782   }
  783 
  784   __ bind(zero);
  785   if (min)
  786     __ vpor(dst, a, b, Assembler::AVX_128bit);
  787   else
  788     __ vpand(dst, a, b, Assembler::AVX_128bit);
  789 
  790   __ jmp(done);
  791 
  792   __ bind(above);
  793   if (single)
  794     __ movflt(dst, min ? b : a);
  795   else
  796     __ movdbl(dst, min ? b : a);
  797 
  798   __ jmp(done);
  799 
  800   __ bind(nan);
  801   if (single) {
  802     __ movl(rt, 0x7fc00000); // Float.NaN
  803     __ movdl(dst, rt);
  804   }
  805   else {
  806     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  807     __ movdq(dst, rt);
  808   }
  809   __ jmp(done);
  810 
  811   __ bind(below);
  812   if (single)
  813     __ movflt(dst, min ? a : b);
  814   else
  815     __ movdbl(dst, min ? a : b);
  816 
  817   __ bind(done);
  818 }
  819 
  820 //=============================================================================
  821 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  822 
  823 int ConstantTable::calculate_table_base_offset() const {
  824   return 0;  // absolute addressing, no offset
  825 }
  826 
  827 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  828 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  829   ShouldNotReachHere();
  830 }
  831 
  832 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  833   // Empty encoding
  834 }
  835 
  836 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  837   return 0;
  838 }
  839 
  840 #ifndef PRODUCT
  841 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  842   st->print("# MachConstantBaseNode (empty encoding)");
  843 }
  844 #endif
  845 
  846 
  847 //=============================================================================
  848 #ifndef PRODUCT
  849 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  850   Compile* C = ra_->C;
  851 
  852   int framesize = C->output()->frame_size_in_bytes();
  853   int bangsize = C->output()->bang_size_in_bytes();
  854   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  855   // Remove wordSize for return addr which is already pushed.
  856   framesize -= wordSize;
  857 
  858   if (C->output()->need_stack_bang(bangsize)) {
  859     framesize -= wordSize;
  860     st->print("# stack bang (%d bytes)", bangsize);
  861     st->print("\n\t");
  862     st->print("pushq   rbp\t# Save rbp");
  863     if (PreserveFramePointer) {
  864         st->print("\n\t");
  865         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  866     }
  867     if (framesize) {
  868       st->print("\n\t");
  869       st->print("subq    rsp, #%d\t# Create frame",framesize);
  870     }
  871   } else {
  872     st->print("subq    rsp, #%d\t# Create frame",framesize);
  873     st->print("\n\t");
  874     framesize -= wordSize;
  875     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  876     if (PreserveFramePointer) {
  877       st->print("\n\t");
  878       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  879       if (framesize > 0) {
  880         st->print("\n\t");
  881         st->print("addq    rbp, #%d", framesize);
  882       }
  883     }
  884   }
  885 
  886   if (VerifyStackAtCalls) {
  887     st->print("\n\t");
  888     framesize -= wordSize;
  889     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  890 #ifdef ASSERT
  891     st->print("\n\t");
  892     st->print("# stack alignment check");
  893 #endif
  894   }
  895   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  896     st->print("\n\t");
  897     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  898     st->print("\n\t");
  899     st->print("je      fast_entry\t");
  900     st->print("\n\t");
  901     st->print("call    #nmethod_entry_barrier_stub\t");
  902     st->print("\n\tfast_entry:");
  903   }
  904   st->cr();
  905 }
  906 #endif
  907 
  908 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  909   Compile* C = ra_->C;
  910   C2_MacroAssembler _masm(&cbuf);
  911 
  912   __ verified_entry(C);
  913 
  914   if (ra_->C->stub_function() == NULL) {
  915     __ entry_barrier();
  916   }
  917 
  918   if (!Compile::current()->output()->in_scratch_emit_size()) {
  919     __ bind(*_verified_entry);
  920   }
  921 
  922   C->output()->set_frame_complete(cbuf.insts_size());
  923 
  924   if (C->has_mach_constant_base_node()) {
  925     // NOTE: We set the table base offset here because users might be
  926     // emitted before MachConstantBaseNode.
  927     ConstantTable& constant_table = C->output()->constant_table();
  928     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  929   }
  930 }
  931 
  932 int MachPrologNode::reloc() const
  933 {
  934   return 0; // a large enough number
  935 }
  936 
  937 //=============================================================================
  938 #ifndef PRODUCT
  939 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  940 {
  941   Compile* C = ra_->C;
  942   if (generate_vzeroupper(C)) {
  943     st->print("vzeroupper");
  944     st->cr(); st->print("\t");
  945   }
  946 
  947   int framesize = C->output()->frame_size_in_bytes();
  948   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  949   // Remove word for return adr already pushed
  950   // and RBP
  951   framesize -= 2*wordSize;
  952 
  953   if (framesize) {
  954     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  955     st->print("\t");
  956   }
  957 
  958   st->print_cr("popq    rbp");
  959   if (do_polling() && C->is_method_compilation()) {
  960     st->print("\t");
  961     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  962                  "ja      #safepoint_stub\t"
  963                  "# Safepoint: poll for GC");
  964   }
  965 }
  966 #endif
  967 
  968 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  969 {
  970   Compile* C = ra_->C;
  971   MacroAssembler _masm(&cbuf);
  972 
  973   if (generate_vzeroupper(C)) {
  974     // Clear upper bits of YMM registers when current compiled code uses
  975     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  976     __ vzeroupper();
  977   }
  978 
  979   // Subtract two words to account for return address and rbp
  980   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  981   __ remove_frame(initial_framesize, C->needs_stack_repair());
  982 
  983   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  984     __ reserved_stack_check();
  985   }
  986 
  987   if (do_polling() && C->is_method_compilation()) {
  988     MacroAssembler _masm(&cbuf);
  989     Label dummy_label;
  990     Label* code_stub = &dummy_label;
  991     if (!C->output()->in_scratch_emit_size()) {
  992       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  993       C->output()->add_stub(stub);
  994       code_stub = &stub->entry();
  995     }
  996     __ relocate(relocInfo::poll_return_type);
  997     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  998   }
  999 }
 1000 
 1001 int MachEpilogNode::reloc() const
 1002 {
 1003   return 2; // a large enough number
 1004 }
 1005 
 1006 const Pipeline* MachEpilogNode::pipeline() const
 1007 {
 1008   return MachNode::pipeline_class();
 1009 }
 1010 
 1011 //=============================================================================
 1012 
 1013 enum RC {
 1014   rc_bad,
 1015   rc_int,
 1016   rc_kreg,
 1017   rc_float,
 1018   rc_stack
 1019 };
 1020 
 1021 static enum RC rc_class(OptoReg::Name reg)
 1022 {
 1023   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1024 
 1025   if (OptoReg::is_stack(reg)) return rc_stack;
 1026 
 1027   VMReg r = OptoReg::as_VMReg(reg);
 1028 
 1029   if (r->is_Register()) return rc_int;
 1030 
 1031   if (r->is_KRegister()) return rc_kreg;
 1032 
 1033   assert(r->is_XMMRegister(), "must be");
 1034   return rc_float;
 1035 }
 1036 
 1037 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 1038 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
 1039                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 1040 
 1041 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
 1042                      int stack_offset, int reg, uint ireg, outputStream* st);
 1043 
 1044 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 1045                                       int dst_offset, uint ireg, outputStream* st) {
 1046   if (cbuf) {
 1047     MacroAssembler _masm(cbuf);
 1048     switch (ireg) {
 1049     case Op_VecS:
 1050       __ movq(Address(rsp, -8), rax);
 1051       __ movl(rax, Address(rsp, src_offset));
 1052       __ movl(Address(rsp, dst_offset), rax);
 1053       __ movq(rax, Address(rsp, -8));
 1054       break;
 1055     case Op_VecD:
 1056       __ pushq(Address(rsp, src_offset));
 1057       __ popq (Address(rsp, dst_offset));
 1058       break;
 1059     case Op_VecX:
 1060       __ pushq(Address(rsp, src_offset));
 1061       __ popq (Address(rsp, dst_offset));
 1062       __ pushq(Address(rsp, src_offset+8));
 1063       __ popq (Address(rsp, dst_offset+8));
 1064       break;
 1065     case Op_VecY:
 1066       __ vmovdqu(Address(rsp, -32), xmm0);
 1067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 1068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 1069       __ vmovdqu(xmm0, Address(rsp, -32));
 1070       break;
 1071     case Op_VecZ:
 1072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 1073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1076       break;
 1077     default:
 1078       ShouldNotReachHere();
 1079     }
 1080 #ifndef PRODUCT
 1081   } else {
 1082     switch (ireg) {
 1083     case Op_VecS:
 1084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1085                 "movl    rax, [rsp + #%d]\n\t"
 1086                 "movl    [rsp + #%d], rax\n\t"
 1087                 "movq    rax, [rsp - #8]",
 1088                 src_offset, dst_offset);
 1089       break;
 1090     case Op_VecD:
 1091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1092                 "popq    [rsp + #%d]",
 1093                 src_offset, dst_offset);
 1094       break;
 1095      case Op_VecX:
 1096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 1097                 "popq    [rsp + #%d]\n\t"
 1098                 "pushq   [rsp + #%d]\n\t"
 1099                 "popq    [rsp + #%d]",
 1100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 1101       break;
 1102     case Op_VecY:
 1103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1106                 "vmovdqu xmm0, [rsp - #32]",
 1107                 src_offset, dst_offset);
 1108       break;
 1109     case Op_VecZ:
 1110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1113                 "vmovdqu xmm0, [rsp - #64]",
 1114                 src_offset, dst_offset);
 1115       break;
 1116     default:
 1117       ShouldNotReachHere();
 1118     }
 1119 #endif
 1120   }
 1121 }
 1122 
 1123 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1124                                        PhaseRegAlloc* ra_,
 1125                                        bool do_size,
 1126                                        outputStream* st) const {
 1127   assert(cbuf != NULL || st  != NULL, "sanity");
 1128   // Get registers to move
 1129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1133 
 1134   enum RC src_second_rc = rc_class(src_second);
 1135   enum RC src_first_rc = rc_class(src_first);
 1136   enum RC dst_second_rc = rc_class(dst_second);
 1137   enum RC dst_first_rc = rc_class(dst_first);
 1138 
 1139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1140          "must move at least 1 register" );
 1141 
 1142   if (src_first == dst_first && src_second == dst_second) {
 1143     // Self copy, no move
 1144     return 0;
 1145   }
 1146   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1147     uint ireg = ideal_reg();
 1148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1151       // mem -> mem
 1152       int src_offset = ra_->reg2offset(src_first);
 1153       int dst_offset = ra_->reg2offset(dst_first);
 1154       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1156       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1158       int stack_offset = ra_->reg2offset(dst_first);
 1159       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1161       int stack_offset = ra_->reg2offset(src_first);
 1162       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1163     } else {
 1164       ShouldNotReachHere();
 1165     }
 1166     return 0;
 1167   }
 1168   if (src_first_rc == rc_stack) {
 1169     // mem ->
 1170     if (dst_first_rc == rc_stack) {
 1171       // mem -> mem
 1172       assert(src_second != dst_first, "overlap");
 1173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1175         // 64-bit
 1176         int src_offset = ra_->reg2offset(src_first);
 1177         int dst_offset = ra_->reg2offset(dst_first);
 1178         if (cbuf) {
 1179           MacroAssembler _masm(cbuf);
 1180           __ pushq(Address(rsp, src_offset));
 1181           __ popq (Address(rsp, dst_offset));
 1182 #ifndef PRODUCT
 1183         } else {
 1184           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1185                     "popq    [rsp + #%d]",
 1186                      src_offset, dst_offset);
 1187 #endif
 1188         }
 1189       } else {
 1190         // 32-bit
 1191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1193         // No pushl/popl, so:
 1194         int src_offset = ra_->reg2offset(src_first);
 1195         int dst_offset = ra_->reg2offset(dst_first);
 1196         if (cbuf) {
 1197           MacroAssembler _masm(cbuf);
 1198           __ movq(Address(rsp, -8), rax);
 1199           __ movl(rax, Address(rsp, src_offset));
 1200           __ movl(Address(rsp, dst_offset), rax);
 1201           __ movq(rax, Address(rsp, -8));
 1202 #ifndef PRODUCT
 1203         } else {
 1204           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1205                     "movl    rax, [rsp + #%d]\n\t"
 1206                     "movl    [rsp + #%d], rax\n\t"
 1207                     "movq    rax, [rsp - #8]",
 1208                      src_offset, dst_offset);
 1209 #endif
 1210         }
 1211       }
 1212       return 0;
 1213     } else if (dst_first_rc == rc_int) {
 1214       // mem -> gpr
 1215       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1216           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1217         // 64-bit
 1218         int offset = ra_->reg2offset(src_first);
 1219         if (cbuf) {
 1220           MacroAssembler _masm(cbuf);
 1221           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1222 #ifndef PRODUCT
 1223         } else {
 1224           st->print("movq    %s, [rsp + #%d]\t# spill",
 1225                      Matcher::regName[dst_first],
 1226                      offset);
 1227 #endif
 1228         }
 1229       } else {
 1230         // 32-bit
 1231         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1232         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1233         int offset = ra_->reg2offset(src_first);
 1234         if (cbuf) {
 1235           MacroAssembler _masm(cbuf);
 1236           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1237 #ifndef PRODUCT
 1238         } else {
 1239           st->print("movl    %s, [rsp + #%d]\t# spill",
 1240                      Matcher::regName[dst_first],
 1241                      offset);
 1242 #endif
 1243         }
 1244       }
 1245       return 0;
 1246     } else if (dst_first_rc == rc_float) {
 1247       // mem-> xmm
 1248       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1249           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1250         // 64-bit
 1251         int offset = ra_->reg2offset(src_first);
 1252         if (cbuf) {
 1253           MacroAssembler _masm(cbuf);
 1254           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1255 #ifndef PRODUCT
 1256         } else {
 1257           st->print("%s  %s, [rsp + #%d]\t# spill",
 1258                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1259                      Matcher::regName[dst_first],
 1260                      offset);
 1261 #endif
 1262         }
 1263       } else {
 1264         // 32-bit
 1265         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1266         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1267         int offset = ra_->reg2offset(src_first);
 1268         if (cbuf) {
 1269           MacroAssembler _masm(cbuf);
 1270           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1271 #ifndef PRODUCT
 1272         } else {
 1273           st->print("movss   %s, [rsp + #%d]\t# spill",
 1274                      Matcher::regName[dst_first],
 1275                      offset);
 1276 #endif
 1277         }
 1278       }
 1279       return 0;
 1280     } else if (dst_first_rc == rc_kreg) {
 1281       // mem -> kreg
 1282       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1283           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1284         // 64-bit
 1285         int offset = ra_->reg2offset(src_first);
 1286         if (cbuf) {
 1287           MacroAssembler _masm(cbuf);
 1288           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1289 #ifndef PRODUCT
 1290         } else {
 1291           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1292                      Matcher::regName[dst_first],
 1293                      offset);
 1294 #endif
 1295         }
 1296       }
 1297       return 0;
 1298     }
 1299   } else if (src_first_rc == rc_int) {
 1300     // gpr ->
 1301     if (dst_first_rc == rc_stack) {
 1302       // gpr -> mem
 1303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1305         // 64-bit
 1306         int offset = ra_->reg2offset(dst_first);
 1307         if (cbuf) {
 1308           MacroAssembler _masm(cbuf);
 1309           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1310 #ifndef PRODUCT
 1311         } else {
 1312           st->print("movq    [rsp + #%d], %s\t# spill",
 1313                      offset,
 1314                      Matcher::regName[src_first]);
 1315 #endif
 1316         }
 1317       } else {
 1318         // 32-bit
 1319         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1320         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1321         int offset = ra_->reg2offset(dst_first);
 1322         if (cbuf) {
 1323           MacroAssembler _masm(cbuf);
 1324           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1325 #ifndef PRODUCT
 1326         } else {
 1327           st->print("movl    [rsp + #%d], %s\t# spill",
 1328                      offset,
 1329                      Matcher::regName[src_first]);
 1330 #endif
 1331         }
 1332       }
 1333       return 0;
 1334     } else if (dst_first_rc == rc_int) {
 1335       // gpr -> gpr
 1336       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1337           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1338         // 64-bit
 1339         if (cbuf) {
 1340           MacroAssembler _masm(cbuf);
 1341           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1342                   as_Register(Matcher::_regEncode[src_first]));
 1343 #ifndef PRODUCT
 1344         } else {
 1345           st->print("movq    %s, %s\t# spill",
 1346                      Matcher::regName[dst_first],
 1347                      Matcher::regName[src_first]);
 1348 #endif
 1349         }
 1350         return 0;
 1351       } else {
 1352         // 32-bit
 1353         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1354         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1355         if (cbuf) {
 1356           MacroAssembler _masm(cbuf);
 1357           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1358                   as_Register(Matcher::_regEncode[src_first]));
 1359 #ifndef PRODUCT
 1360         } else {
 1361           st->print("movl    %s, %s\t# spill",
 1362                      Matcher::regName[dst_first],
 1363                      Matcher::regName[src_first]);
 1364 #endif
 1365         }
 1366         return 0;
 1367       }
 1368     } else if (dst_first_rc == rc_float) {
 1369       // gpr -> xmm
 1370       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1371           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1372         // 64-bit
 1373         if (cbuf) {
 1374           MacroAssembler _masm(cbuf);
 1375           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1376 #ifndef PRODUCT
 1377         } else {
 1378           st->print("movdq   %s, %s\t# spill",
 1379                      Matcher::regName[dst_first],
 1380                      Matcher::regName[src_first]);
 1381 #endif
 1382         }
 1383       } else {
 1384         // 32-bit
 1385         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1386         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1387         if (cbuf) {
 1388           MacroAssembler _masm(cbuf);
 1389           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1390 #ifndef PRODUCT
 1391         } else {
 1392           st->print("movdl   %s, %s\t# spill",
 1393                      Matcher::regName[dst_first],
 1394                      Matcher::regName[src_first]);
 1395 #endif
 1396         }
 1397       }
 1398       return 0;
 1399     } else if (dst_first_rc == rc_kreg) {
 1400       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1401           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1402         // 64-bit
 1403         if (cbuf) {
 1404           MacroAssembler _masm(cbuf);
 1405           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1406   #ifndef PRODUCT
 1407         } else {
 1408            st->print("kmovq   %s, %s\t# spill",
 1409                        Matcher::regName[dst_first],
 1410                        Matcher::regName[src_first]);
 1411   #endif
 1412         }
 1413       }
 1414       Unimplemented();
 1415       return 0;
 1416     }
 1417   } else if (src_first_rc == rc_float) {
 1418     // xmm ->
 1419     if (dst_first_rc == rc_stack) {
 1420       // xmm -> mem
 1421       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1422           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1423         // 64-bit
 1424         int offset = ra_->reg2offset(dst_first);
 1425         if (cbuf) {
 1426           MacroAssembler _masm(cbuf);
 1427           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1428 #ifndef PRODUCT
 1429         } else {
 1430           st->print("movsd   [rsp + #%d], %s\t# spill",
 1431                      offset,
 1432                      Matcher::regName[src_first]);
 1433 #endif
 1434         }
 1435       } else {
 1436         // 32-bit
 1437         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1438         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1439         int offset = ra_->reg2offset(dst_first);
 1440         if (cbuf) {
 1441           MacroAssembler _masm(cbuf);
 1442           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1443 #ifndef PRODUCT
 1444         } else {
 1445           st->print("movss   [rsp + #%d], %s\t# spill",
 1446                      offset,
 1447                      Matcher::regName[src_first]);
 1448 #endif
 1449         }
 1450       }
 1451       return 0;
 1452     } else if (dst_first_rc == rc_int) {
 1453       // xmm -> gpr
 1454       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1455           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1456         // 64-bit
 1457         if (cbuf) {
 1458           MacroAssembler _masm(cbuf);
 1459           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1460 #ifndef PRODUCT
 1461         } else {
 1462           st->print("movdq   %s, %s\t# spill",
 1463                      Matcher::regName[dst_first],
 1464                      Matcher::regName[src_first]);
 1465 #endif
 1466         }
 1467       } else {
 1468         // 32-bit
 1469         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1470         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1471         if (cbuf) {
 1472           MacroAssembler _masm(cbuf);
 1473           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1474 #ifndef PRODUCT
 1475         } else {
 1476           st->print("movdl   %s, %s\t# spill",
 1477                      Matcher::regName[dst_first],
 1478                      Matcher::regName[src_first]);
 1479 #endif
 1480         }
 1481       }
 1482       return 0;
 1483     } else if (dst_first_rc == rc_float) {
 1484       // xmm -> xmm
 1485       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1486           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1487         // 64-bit
 1488         if (cbuf) {
 1489           MacroAssembler _masm(cbuf);
 1490           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1491 #ifndef PRODUCT
 1492         } else {
 1493           st->print("%s  %s, %s\t# spill",
 1494                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1495                      Matcher::regName[dst_first],
 1496                      Matcher::regName[src_first]);
 1497 #endif
 1498         }
 1499       } else {
 1500         // 32-bit
 1501         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1502         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1503         if (cbuf) {
 1504           MacroAssembler _masm(cbuf);
 1505           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1506 #ifndef PRODUCT
 1507         } else {
 1508           st->print("%s  %s, %s\t# spill",
 1509                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1510                      Matcher::regName[dst_first],
 1511                      Matcher::regName[src_first]);
 1512 #endif
 1513         }
 1514       }
 1515       return 0;
 1516     } else if (dst_first_rc == rc_kreg) {
 1517       assert(false, "Illegal spilling");
 1518       return 0;
 1519     }
 1520   } else if (src_first_rc == rc_kreg) {
 1521     if (dst_first_rc == rc_stack) {
 1522       // mem -> kreg
 1523       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1524           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1525         // 64-bit
 1526         int offset = ra_->reg2offset(dst_first);
 1527         if (cbuf) {
 1528           MacroAssembler _masm(cbuf);
 1529           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1530 #ifndef PRODUCT
 1531         } else {
 1532           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1533                      offset,
 1534                      Matcher::regName[src_first]);
 1535 #endif
 1536         }
 1537       }
 1538       return 0;
 1539     } else if (dst_first_rc == rc_int) {
 1540       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1541           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1542         // 64-bit
 1543         if (cbuf) {
 1544           MacroAssembler _masm(cbuf);
 1545           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1546 #ifndef PRODUCT
 1547         } else {
 1548          st->print("kmovq   %s, %s\t# spill",
 1549                      Matcher::regName[dst_first],
 1550                      Matcher::regName[src_first]);
 1551 #endif
 1552         }
 1553       }
 1554       Unimplemented();
 1555       return 0;
 1556     } else if (dst_first_rc == rc_kreg) {
 1557       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1558           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1559         // 64-bit
 1560         if (cbuf) {
 1561           MacroAssembler _masm(cbuf);
 1562           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1563 #ifndef PRODUCT
 1564         } else {
 1565          st->print("kmovq   %s, %s\t# spill",
 1566                      Matcher::regName[dst_first],
 1567                      Matcher::regName[src_first]);
 1568 #endif
 1569         }
 1570       }
 1571       return 0;
 1572     } else if (dst_first_rc == rc_float) {
 1573       assert(false, "Illegal spill");
 1574       return 0;
 1575     }
 1576   }
 1577 
 1578   assert(0," foo ");
 1579   Unimplemented();
 1580   return 0;
 1581 }
 1582 
 1583 #ifndef PRODUCT
 1584 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1585   implementation(NULL, ra_, false, st);
 1586 }
 1587 #endif
 1588 
 1589 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1590   implementation(&cbuf, ra_, false, NULL);
 1591 }
 1592 
 1593 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1594   return MachNode::size(ra_);
 1595 }
 1596 
 1597 //=============================================================================
 1598 #ifndef PRODUCT
 1599 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1600 {
 1601   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1602   int reg = ra_->get_reg_first(this);
 1603   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1604             Matcher::regName[reg], offset);
 1605 }
 1606 #endif
 1607 
 1608 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1609 {
 1610   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1611   int reg = ra_->get_encode(this);
 1612   if (offset >= 0x80) {
 1613     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1614     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1615     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1616     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1617     emit_d32(cbuf, offset);
 1618   } else {
 1619     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1620     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1621     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1622     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1623     emit_d8(cbuf, offset);
 1624   }
 1625 }
 1626 
 1627 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1628 {
 1629   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1630   return (offset < 0x80) ? 5 : 8; // REX
 1631 }
 1632 
 1633 //=============================================================================
 1634 #ifndef PRODUCT
 1635 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1636 {
 1637   st->print_cr("MachVEPNode");
 1638 }
 1639 #endif
 1640 
 1641 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1642 {
 1643   C2_MacroAssembler _masm(&cbuf);
 1644   uint insts_size = cbuf.insts_size();
 1645   if (!_verified) {
 1646     if (UseCompressedClassPointers) {
 1647       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1648       __ cmpptr(rax, rscratch1);
 1649     } else {
 1650       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1651     }
 1652     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1653   } else {
 1654     // TODO 8284443 Avoid creation of temporary frame
 1655     if (ra_->C->stub_function() == NULL) {
 1656       __ verified_entry(ra_->C, 0);
 1657       __ entry_barrier();
 1658       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1659       __ remove_frame(initial_framesize, false);
 1660     }
 1661     // Unpack inline type args passed as oop and then jump to
 1662     // the verified entry point (skipping the unverified entry).
 1663     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1664     // Emit code for verified entry and save increment for stack repair on return
 1665     __ verified_entry(ra_->C, sp_inc);
 1666     if (Compile::current()->output()->in_scratch_emit_size()) {
 1667       Label dummy_verified_entry;
 1668       __ jmp(dummy_verified_entry);
 1669     } else {
 1670       __ jmp(*_verified_entry);
 1671     }
 1672   }
 1673   /* WARNING these NOPs are critical so that verified entry point is properly
 1674      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1675   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1676   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1677   if (nops_cnt > 0) {
 1678     __ nop(nops_cnt);
 1679   }
 1680 }
 1681 
 1682 //=============================================================================
 1683 #ifndef PRODUCT
 1684 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1685 {
 1686   if (UseCompressedClassPointers) {
 1687     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1688     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1689     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1690   } else {
 1691     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1692                  "# Inline cache check");
 1693   }
 1694   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1695   st->print_cr("\tnop\t# nops to align entry point");
 1696 }
 1697 #endif
 1698 
 1699 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1700 {
 1701   MacroAssembler masm(&cbuf);
 1702   uint insts_size = cbuf.insts_size();
 1703   if (UseCompressedClassPointers) {
 1704     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1705     masm.cmpptr(rax, rscratch1);
 1706   } else {
 1707     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1708   }
 1709 
 1710   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1711 
 1712   /* WARNING these NOPs are critical so that verified entry point is properly
 1713      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1714   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1715   if (OptoBreakpoint) {
 1716     // Leave space for int3
 1717     nops_cnt -= 1;
 1718   }
 1719   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1720   if (nops_cnt > 0)
 1721     masm.nop(nops_cnt);
 1722 }
 1723 
 1724 //=============================================================================
 1725 
 1726 const bool Matcher::supports_vector_calling_convention(void) {
 1727   if (EnableVectorSupport && UseVectorStubs) {
 1728     return true;
 1729   }
 1730   return false;
 1731 }
 1732 
 1733 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1734   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1735   int lo = XMM0_num;
 1736   int hi = XMM0b_num;
 1737   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1738   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1739   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1740   return OptoRegPair(hi, lo);
 1741 }
 1742 
 1743 // Is this branch offset short enough that a short branch can be used?
 1744 //
 1745 // NOTE: If the platform does not provide any short branch variants, then
 1746 //       this method should return false for offset 0.
 1747 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1748   // The passed offset is relative to address of the branch.
 1749   // On 86 a branch displacement is calculated relative to address
 1750   // of a next instruction.
 1751   offset -= br_size;
 1752 
 1753   // the short version of jmpConUCF2 contains multiple branches,
 1754   // making the reach slightly less
 1755   if (rule == jmpConUCF2_rule)
 1756     return (-126 <= offset && offset <= 125);
 1757   return (-128 <= offset && offset <= 127);
 1758 }
 1759 
 1760 // Return whether or not this register is ever used as an argument.
 1761 // This function is used on startup to build the trampoline stubs in
 1762 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1763 // call in the trampoline, and arguments in those registers not be
 1764 // available to the callee.
 1765 bool Matcher::can_be_java_arg(int reg)
 1766 {
 1767   return
 1768     reg ==  RDI_num || reg == RDI_H_num ||
 1769     reg ==  RSI_num || reg == RSI_H_num ||
 1770     reg ==  RDX_num || reg == RDX_H_num ||
 1771     reg ==  RCX_num || reg == RCX_H_num ||
 1772     reg ==   R8_num || reg ==  R8_H_num ||
 1773     reg ==   R9_num || reg ==  R9_H_num ||
 1774     reg ==  R12_num || reg == R12_H_num ||
 1775     reg == XMM0_num || reg == XMM0b_num ||
 1776     reg == XMM1_num || reg == XMM1b_num ||
 1777     reg == XMM2_num || reg == XMM2b_num ||
 1778     reg == XMM3_num || reg == XMM3b_num ||
 1779     reg == XMM4_num || reg == XMM4b_num ||
 1780     reg == XMM5_num || reg == XMM5b_num ||
 1781     reg == XMM6_num || reg == XMM6b_num ||
 1782     reg == XMM7_num || reg == XMM7b_num;
 1783 }
 1784 
 1785 bool Matcher::is_spillable_arg(int reg)
 1786 {
 1787   return can_be_java_arg(reg);
 1788 }
 1789 
 1790 uint Matcher::int_pressure_limit()
 1791 {
 1792   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1793 }
 1794 
 1795 uint Matcher::float_pressure_limit()
 1796 {
 1797   // After experiment around with different values, the following default threshold
 1798   // works best for LCM's register pressure scheduling on x64.
 1799   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1800   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1801   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1802 }
 1803 
 1804 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1805   // In 64 bit mode a code which use multiply when
 1806   // devisor is constant is faster than hardware
 1807   // DIV instruction (it uses MulHiL).
 1808   return false;
 1809 }
 1810 
 1811 // Register for DIVI projection of divmodI
 1812 RegMask Matcher::divI_proj_mask() {
 1813   return INT_RAX_REG_mask();
 1814 }
 1815 
 1816 // Register for MODI projection of divmodI
 1817 RegMask Matcher::modI_proj_mask() {
 1818   return INT_RDX_REG_mask();
 1819 }
 1820 
 1821 // Register for DIVL projection of divmodL
 1822 RegMask Matcher::divL_proj_mask() {
 1823   return LONG_RAX_REG_mask();
 1824 }
 1825 
 1826 // Register for MODL projection of divmodL
 1827 RegMask Matcher::modL_proj_mask() {
 1828   return LONG_RDX_REG_mask();
 1829 }
 1830 
 1831 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1832 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1833     return NO_REG_mask();
 1834 }
 1835 
 1836 %}
 1837 
 1838 //----------ENCODING BLOCK-----------------------------------------------------
 1839 // This block specifies the encoding classes used by the compiler to
 1840 // output byte streams.  Encoding classes are parameterized macros
 1841 // used by Machine Instruction Nodes in order to generate the bit
 1842 // encoding of the instruction.  Operands specify their base encoding
 1843 // interface with the interface keyword.  There are currently
 1844 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1845 // COND_INTER.  REG_INTER causes an operand to generate a function
 1846 // which returns its register number when queried.  CONST_INTER causes
 1847 // an operand to generate a function which returns the value of the
 1848 // constant when queried.  MEMORY_INTER causes an operand to generate
 1849 // four functions which return the Base Register, the Index Register,
 1850 // the Scale Value, and the Offset Value of the operand when queried.
 1851 // COND_INTER causes an operand to generate six functions which return
 1852 // the encoding code (ie - encoding bits for the instruction)
 1853 // associated with each basic boolean condition for a conditional
 1854 // instruction.
 1855 //
 1856 // Instructions specify two basic values for encoding.  Again, a
 1857 // function is available to check if the constant displacement is an
 1858 // oop. They use the ins_encode keyword to specify their encoding
 1859 // classes (which must be a sequence of enc_class names, and their
 1860 // parameters, specified in the encoding block), and they use the
 1861 // opcode keyword to specify, in order, their primary, secondary, and
 1862 // tertiary opcode.  Only the opcode sections which a particular
 1863 // instruction needs for encoding need to be specified.
 1864 encode %{
 1865   // Build emit functions for each basic byte or larger field in the
 1866   // intel encoding scheme (opcode, rm, sib, immediate), and call them
 1867   // from C++ code in the enc_class source block.  Emit functions will
 1868   // live in the main source block for now.  In future, we can
 1869   // generalize this by adding a syntax that specifies the sizes of
 1870   // fields in an order, so that the adlc can build the emit functions
 1871   // automagically
 1872 
 1873   // Emit primary opcode
 1874   enc_class OpcP
 1875   %{
 1876     emit_opcode(cbuf, $primary);
 1877   %}
 1878 
 1879   // Emit secondary opcode
 1880   enc_class OpcS
 1881   %{
 1882     emit_opcode(cbuf, $secondary);
 1883   %}
 1884 
 1885   // Emit tertiary opcode
 1886   enc_class OpcT
 1887   %{
 1888     emit_opcode(cbuf, $tertiary);
 1889   %}
 1890 
 1891   // Emit opcode directly
 1892   enc_class Opcode(immI d8)
 1893   %{
 1894     emit_opcode(cbuf, $d8$$constant);
 1895   %}
 1896 
 1897   // Emit size prefix
 1898   enc_class SizePrefix
 1899   %{
 1900     emit_opcode(cbuf, 0x66);
 1901   %}
 1902 
 1903   enc_class reg(rRegI reg)
 1904   %{
 1905     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
 1906   %}
 1907 
 1908   enc_class reg_reg(rRegI dst, rRegI src)
 1909   %{
 1910     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1911   %}
 1912 
 1913   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
 1914   %{
 1915     emit_opcode(cbuf, $opcode$$constant);
 1916     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1917   %}
 1918 
 1919   enc_class cdql_enc(no_rax_rdx_RegI div)
 1920   %{
 1921     // Full implementation of Java idiv and irem; checks for
 1922     // special case as described in JVM spec., p.243 & p.271.
 1923     //
 1924     //         normal case                           special case
 1925     //
 1926     // input : rax: dividend                         min_int
 1927     //         reg: divisor                          -1
 1928     //
 1929     // output: rax: quotient  (= rax idiv reg)       min_int
 1930     //         rdx: remainder (= rax irem reg)       0
 1931     //
 1932     //  Code sequnce:
 1933     //
 1934     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1935     //    5:   75 07/08                jne    e <normal>
 1936     //    7:   33 d2                   xor    %edx,%edx
 1937     //  [div >= 8 -> offset + 1]
 1938     //  [REX_B]
 1939     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1940     //    c:   74 03/04                je     11 <done>
 1941     // 000000000000000e <normal>:
 1942     //    e:   99                      cltd
 1943     //  [div >= 8 -> offset + 1]
 1944     //  [REX_B]
 1945     //    f:   f7 f9                   idiv   $div
 1946     // 0000000000000011 <done>:
 1947     MacroAssembler _masm(&cbuf);
 1948     Label normal;
 1949     Label done;
 1950 
 1951     // cmp    $0x80000000,%eax
 1952     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1953 
 1954     // jne    e <normal>
 1955     __ jccb(Assembler::notEqual, normal);
 1956 
 1957     // xor    %edx,%edx
 1958     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1959 
 1960     // cmp    $0xffffffffffffffff,%ecx
 1961     __ cmpl($div$$Register, -1);
 1962 
 1963     // je     11 <done>
 1964     __ jccb(Assembler::equal, done);
 1965 
 1966     // <normal>
 1967     // cltd
 1968     __ bind(normal);
 1969     __ cdql();
 1970 
 1971     // idivl
 1972     // <done>
 1973     __ idivl($div$$Register);
 1974     __ bind(done);
 1975   %}
 1976 
 1977   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1978   %{
 1979     // Full implementation of Java ldiv and lrem; checks for
 1980     // special case as described in JVM spec., p.243 & p.271.
 1981     //
 1982     //         normal case                           special case
 1983     //
 1984     // input : rax: dividend                         min_long
 1985     //         reg: divisor                          -1
 1986     //
 1987     // output: rax: quotient  (= rax idiv reg)       min_long
 1988     //         rdx: remainder (= rax irem reg)       0
 1989     //
 1990     //  Code sequnce:
 1991     //
 1992     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1993     //    7:   00 00 80
 1994     //    a:   48 39 d0                cmp    %rdx,%rax
 1995     //    d:   75 08                   jne    17 <normal>
 1996     //    f:   33 d2                   xor    %edx,%edx
 1997     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 1998     //   15:   74 05                   je     1c <done>
 1999     // 0000000000000017 <normal>:
 2000     //   17:   48 99                   cqto
 2001     //   19:   48 f7 f9                idiv   $div
 2002     // 000000000000001c <done>:
 2003     MacroAssembler _masm(&cbuf);
 2004     Label normal;
 2005     Label done;
 2006 
 2007     // mov    $0x8000000000000000,%rdx
 2008     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 2009 
 2010     // cmp    %rdx,%rax
 2011     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 2012 
 2013     // jne    17 <normal>
 2014     __ jccb(Assembler::notEqual, normal);
 2015 
 2016     // xor    %edx,%edx
 2017     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 2018 
 2019     // cmp    $0xffffffffffffffff,$div
 2020     __ cmpq($div$$Register, -1);
 2021 
 2022     // je     1e <done>
 2023     __ jccb(Assembler::equal, done);
 2024 
 2025     // <normal>
 2026     // cqto
 2027     __ bind(normal);
 2028     __ cdqq();
 2029 
 2030     // idivq (note: must be emitted by the user of this rule)
 2031     // <done>
 2032     __ idivq($div$$Register);
 2033     __ bind(done);
 2034   %}
 2035 
 2036   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 2037   enc_class OpcSE(immI imm)
 2038   %{
 2039     // Emit primary opcode and set sign-extend bit
 2040     // Check for 8-bit immediate, and set sign extend bit in opcode
 2041     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2042       emit_opcode(cbuf, $primary | 0x02);
 2043     } else {
 2044       // 32-bit immediate
 2045       emit_opcode(cbuf, $primary);
 2046     }
 2047   %}
 2048 
 2049   enc_class OpcSErm(rRegI dst, immI imm)
 2050   %{
 2051     // OpcSEr/m
 2052     int dstenc = $dst$$reg;
 2053     if (dstenc >= 8) {
 2054       emit_opcode(cbuf, Assembler::REX_B);
 2055       dstenc -= 8;
 2056     }
 2057     // Emit primary opcode and set sign-extend bit
 2058     // Check for 8-bit immediate, and set sign extend bit in opcode
 2059     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2060       emit_opcode(cbuf, $primary | 0x02);
 2061     } else {
 2062       // 32-bit immediate
 2063       emit_opcode(cbuf, $primary);
 2064     }
 2065     // Emit r/m byte with secondary opcode, after primary opcode.
 2066     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2067   %}
 2068 
 2069   enc_class OpcSErm_wide(rRegL dst, immI imm)
 2070   %{
 2071     // OpcSEr/m
 2072     int dstenc = $dst$$reg;
 2073     if (dstenc < 8) {
 2074       emit_opcode(cbuf, Assembler::REX_W);
 2075     } else {
 2076       emit_opcode(cbuf, Assembler::REX_WB);
 2077       dstenc -= 8;
 2078     }
 2079     // Emit primary opcode and set sign-extend bit
 2080     // Check for 8-bit immediate, and set sign extend bit in opcode
 2081     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2082       emit_opcode(cbuf, $primary | 0x02);
 2083     } else {
 2084       // 32-bit immediate
 2085       emit_opcode(cbuf, $primary);
 2086     }
 2087     // Emit r/m byte with secondary opcode, after primary opcode.
 2088     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2089   %}
 2090 
 2091   enc_class Con8or32(immI imm)
 2092   %{
 2093     // Check for 8-bit immediate, and set sign extend bit in opcode
 2094     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2095       $$$emit8$imm$$constant;
 2096     } else {
 2097       // 32-bit immediate
 2098       $$$emit32$imm$$constant;
 2099     }
 2100   %}
 2101 
 2102   enc_class opc2_reg(rRegI dst)
 2103   %{
 2104     // BSWAP
 2105     emit_cc(cbuf, $secondary, $dst$$reg);
 2106   %}
 2107 
 2108   enc_class opc3_reg(rRegI dst)
 2109   %{
 2110     // BSWAP
 2111     emit_cc(cbuf, $tertiary, $dst$$reg);
 2112   %}
 2113 
 2114   enc_class reg_opc(rRegI div)
 2115   %{
 2116     // INC, DEC, IDIV, IMOD, JMP indirect, ...
 2117     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
 2118   %}
 2119 
 2120   enc_class enc_cmov(cmpOp cop)
 2121   %{
 2122     // CMOV
 2123     $$$emit8$primary;
 2124     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2125   %}
 2126 
 2127   enc_class enc_PartialSubtypeCheck()
 2128   %{
 2129     Register Rrdi = as_Register(RDI_enc); // result register
 2130     Register Rrax = as_Register(RAX_enc); // super class
 2131     Register Rrcx = as_Register(RCX_enc); // killed
 2132     Register Rrsi = as_Register(RSI_enc); // sub class
 2133     Label miss;
 2134     const bool set_cond_codes = true;
 2135 
 2136     MacroAssembler _masm(&cbuf);
 2137     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2138                                      NULL, &miss,
 2139                                      /*set_cond_codes:*/ true);
 2140     if ($primary) {
 2141       __ xorptr(Rrdi, Rrdi);
 2142     }
 2143     __ bind(miss);
 2144   %}
 2145 
 2146   enc_class clear_avx %{
 2147     debug_only(int off0 = cbuf.insts_size());
 2148     if (generate_vzeroupper(Compile::current())) {
 2149       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2150       // Clear upper bits of YMM registers when current compiled code uses
 2151       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2152       MacroAssembler _masm(&cbuf);
 2153       __ vzeroupper();
 2154     }
 2155     debug_only(int off1 = cbuf.insts_size());
 2156     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2157   %}
 2158 
 2159   enc_class Java_To_Runtime(method meth) %{
 2160     // No relocation needed
 2161     MacroAssembler _masm(&cbuf);
 2162     __ mov64(r10, (int64_t) $meth$$method);
 2163     __ call(r10);
 2164     __ post_call_nop();
 2165   %}
 2166 
 2167   enc_class Java_Static_Call(method meth)
 2168   %{
 2169     // JAVA STATIC CALL
 2170     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 2171     // determine who we intended to call.
 2172     MacroAssembler _masm(&cbuf);
 2173     cbuf.set_insts_mark();
 2174 
 2175     if (!_method) {
 2176       $$$emit8$primary;
 2177       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2178                      runtime_call_Relocation::spec(),
 2179                      RELOC_DISP32);
 2180     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 2181       // The NOP here is purely to ensure that eliding a call to
 2182       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 2183       __ addr_nop_5();
 2184       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 2185     } else {
 2186       $$$emit8$primary;
 2187       int method_index = resolved_method_index(cbuf);
 2188       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2189                                                   : static_call_Relocation::spec(method_index);
 2190       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2191                      rspec, RELOC_DISP32);
 2192       address mark = cbuf.insts_mark();
 2193       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2194         // Calls of the same statically bound method can share
 2195         // a stub to the interpreter.
 2196         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2197       } else {
 2198         // Emit stubs for static call.
 2199         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2200         if (stub == NULL) {
 2201           ciEnv::current()->record_failure("CodeCache is full");
 2202           return;
 2203         }
 2204       }
 2205     }
 2206     _masm.clear_inst_mark();
 2207     __ post_call_nop();
 2208   %}
 2209 
 2210   enc_class Java_Dynamic_Call(method meth) %{
 2211     MacroAssembler _masm(&cbuf);
 2212     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2213     __ post_call_nop();
 2214   %}
 2215 
 2216   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2217   %{
 2218     // SAL, SAR, SHR
 2219     int dstenc = $dst$$reg;
 2220     if (dstenc >= 8) {
 2221       emit_opcode(cbuf, Assembler::REX_B);
 2222       dstenc -= 8;
 2223     }
 2224     $$$emit8$primary;
 2225     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2226     $$$emit8$shift$$constant;
 2227   %}
 2228 
 2229   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
 2230   %{
 2231     // SAL, SAR, SHR
 2232     int dstenc = $dst$$reg;
 2233     if (dstenc < 8) {
 2234       emit_opcode(cbuf, Assembler::REX_W);
 2235     } else {
 2236       emit_opcode(cbuf, Assembler::REX_WB);
 2237       dstenc -= 8;
 2238     }
 2239     $$$emit8$primary;
 2240     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2241     $$$emit8$shift$$constant;
 2242   %}
 2243 
 2244   enc_class load_immI(rRegI dst, immI src)
 2245   %{
 2246     int dstenc = $dst$$reg;
 2247     if (dstenc >= 8) {
 2248       emit_opcode(cbuf, Assembler::REX_B);
 2249       dstenc -= 8;
 2250     }
 2251     emit_opcode(cbuf, 0xB8 | dstenc);
 2252     $$$emit32$src$$constant;
 2253   %}
 2254 
 2255   enc_class load_immL(rRegL dst, immL src)
 2256   %{
 2257     int dstenc = $dst$$reg;
 2258     if (dstenc < 8) {
 2259       emit_opcode(cbuf, Assembler::REX_W);
 2260     } else {
 2261       emit_opcode(cbuf, Assembler::REX_WB);
 2262       dstenc -= 8;
 2263     }
 2264     emit_opcode(cbuf, 0xB8 | dstenc);
 2265     emit_d64(cbuf, $src$$constant);
 2266   %}
 2267 
 2268   enc_class load_immUL32(rRegL dst, immUL32 src)
 2269   %{
 2270     // same as load_immI, but this time we care about zeroes in the high word
 2271     int dstenc = $dst$$reg;
 2272     if (dstenc >= 8) {
 2273       emit_opcode(cbuf, Assembler::REX_B);
 2274       dstenc -= 8;
 2275     }
 2276     emit_opcode(cbuf, 0xB8 | dstenc);
 2277     $$$emit32$src$$constant;
 2278   %}
 2279 
 2280   enc_class load_immL32(rRegL dst, immL32 src)
 2281   %{
 2282     int dstenc = $dst$$reg;
 2283     if (dstenc < 8) {
 2284       emit_opcode(cbuf, Assembler::REX_W);
 2285     } else {
 2286       emit_opcode(cbuf, Assembler::REX_WB);
 2287       dstenc -= 8;
 2288     }
 2289     emit_opcode(cbuf, 0xC7);
 2290     emit_rm(cbuf, 0x03, 0x00, dstenc);
 2291     $$$emit32$src$$constant;
 2292   %}
 2293 
 2294   enc_class load_immP31(rRegP dst, immP32 src)
 2295   %{
 2296     // same as load_immI, but this time we care about zeroes in the high word
 2297     int dstenc = $dst$$reg;
 2298     if (dstenc >= 8) {
 2299       emit_opcode(cbuf, Assembler::REX_B);
 2300       dstenc -= 8;
 2301     }
 2302     emit_opcode(cbuf, 0xB8 | dstenc);
 2303     $$$emit32$src$$constant;
 2304   %}
 2305 
 2306   enc_class load_immP(rRegP dst, immP src)
 2307   %{
 2308     int dstenc = $dst$$reg;
 2309     if (dstenc < 8) {
 2310       emit_opcode(cbuf, Assembler::REX_W);
 2311     } else {
 2312       emit_opcode(cbuf, Assembler::REX_WB);
 2313       dstenc -= 8;
 2314     }
 2315     emit_opcode(cbuf, 0xB8 | dstenc);
 2316     // This next line should be generated from ADLC
 2317     if ($src->constant_reloc() != relocInfo::none) {
 2318       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
 2319     } else {
 2320       emit_d64(cbuf, $src$$constant);
 2321     }
 2322   %}
 2323 
 2324   enc_class Con32(immI src)
 2325   %{
 2326     // Output immediate
 2327     $$$emit32$src$$constant;
 2328   %}
 2329 
 2330   enc_class Con32F_as_bits(immF src)
 2331   %{
 2332     // Output Float immediate bits
 2333     jfloat jf = $src$$constant;
 2334     jint jf_as_bits = jint_cast(jf);
 2335     emit_d32(cbuf, jf_as_bits);
 2336   %}
 2337 
 2338   enc_class Con16(immI src)
 2339   %{
 2340     // Output immediate
 2341     $$$emit16$src$$constant;
 2342   %}
 2343 
 2344   // How is this different from Con32??? XXX
 2345   enc_class Con_d32(immI src)
 2346   %{
 2347     emit_d32(cbuf,$src$$constant);
 2348   %}
 2349 
 2350   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
 2351     // Output immediate memory reference
 2352     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2353     emit_d32(cbuf, 0x00);
 2354   %}
 2355 
 2356   enc_class lock_prefix()
 2357   %{
 2358     emit_opcode(cbuf, 0xF0); // lock
 2359   %}
 2360 
 2361   enc_class REX_mem(memory mem)
 2362   %{
 2363     if ($mem$$base >= 8) {
 2364       if ($mem$$index < 8) {
 2365         emit_opcode(cbuf, Assembler::REX_B);
 2366       } else {
 2367         emit_opcode(cbuf, Assembler::REX_XB);
 2368       }
 2369     } else {
 2370       if ($mem$$index >= 8) {
 2371         emit_opcode(cbuf, Assembler::REX_X);
 2372       }
 2373     }
 2374   %}
 2375 
 2376   enc_class REX_mem_wide(memory mem)
 2377   %{
 2378     if ($mem$$base >= 8) {
 2379       if ($mem$$index < 8) {
 2380         emit_opcode(cbuf, Assembler::REX_WB);
 2381       } else {
 2382         emit_opcode(cbuf, Assembler::REX_WXB);
 2383       }
 2384     } else {
 2385       if ($mem$$index < 8) {
 2386         emit_opcode(cbuf, Assembler::REX_W);
 2387       } else {
 2388         emit_opcode(cbuf, Assembler::REX_WX);
 2389       }
 2390     }
 2391   %}
 2392 
 2393   // for byte regs
 2394   enc_class REX_breg(rRegI reg)
 2395   %{
 2396     if ($reg$$reg >= 4) {
 2397       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2398     }
 2399   %}
 2400 
 2401   // for byte regs
 2402   enc_class REX_reg_breg(rRegI dst, rRegI src)
 2403   %{
 2404     if ($dst$$reg < 8) {
 2405       if ($src$$reg >= 4) {
 2406         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2407       }
 2408     } else {
 2409       if ($src$$reg < 8) {
 2410         emit_opcode(cbuf, Assembler::REX_R);
 2411       } else {
 2412         emit_opcode(cbuf, Assembler::REX_RB);
 2413       }
 2414     }
 2415   %}
 2416 
 2417   // for byte regs
 2418   enc_class REX_breg_mem(rRegI reg, memory mem)
 2419   %{
 2420     if ($reg$$reg < 8) {
 2421       if ($mem$$base < 8) {
 2422         if ($mem$$index >= 8) {
 2423           emit_opcode(cbuf, Assembler::REX_X);
 2424         } else if ($reg$$reg >= 4) {
 2425           emit_opcode(cbuf, Assembler::REX);
 2426         }
 2427       } else {
 2428         if ($mem$$index < 8) {
 2429           emit_opcode(cbuf, Assembler::REX_B);
 2430         } else {
 2431           emit_opcode(cbuf, Assembler::REX_XB);
 2432         }
 2433       }
 2434     } else {
 2435       if ($mem$$base < 8) {
 2436         if ($mem$$index < 8) {
 2437           emit_opcode(cbuf, Assembler::REX_R);
 2438         } else {
 2439           emit_opcode(cbuf, Assembler::REX_RX);
 2440         }
 2441       } else {
 2442         if ($mem$$index < 8) {
 2443           emit_opcode(cbuf, Assembler::REX_RB);
 2444         } else {
 2445           emit_opcode(cbuf, Assembler::REX_RXB);
 2446         }
 2447       }
 2448     }
 2449   %}
 2450 
 2451   enc_class REX_reg(rRegI reg)
 2452   %{
 2453     if ($reg$$reg >= 8) {
 2454       emit_opcode(cbuf, Assembler::REX_B);
 2455     }
 2456   %}
 2457 
 2458   enc_class REX_reg_wide(rRegI reg)
 2459   %{
 2460     if ($reg$$reg < 8) {
 2461       emit_opcode(cbuf, Assembler::REX_W);
 2462     } else {
 2463       emit_opcode(cbuf, Assembler::REX_WB);
 2464     }
 2465   %}
 2466 
 2467   enc_class REX_reg_reg(rRegI dst, rRegI src)
 2468   %{
 2469     if ($dst$$reg < 8) {
 2470       if ($src$$reg >= 8) {
 2471         emit_opcode(cbuf, Assembler::REX_B);
 2472       }
 2473     } else {
 2474       if ($src$$reg < 8) {
 2475         emit_opcode(cbuf, Assembler::REX_R);
 2476       } else {
 2477         emit_opcode(cbuf, Assembler::REX_RB);
 2478       }
 2479     }
 2480   %}
 2481 
 2482   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
 2483   %{
 2484     if ($dst$$reg < 8) {
 2485       if ($src$$reg < 8) {
 2486         emit_opcode(cbuf, Assembler::REX_W);
 2487       } else {
 2488         emit_opcode(cbuf, Assembler::REX_WB);
 2489       }
 2490     } else {
 2491       if ($src$$reg < 8) {
 2492         emit_opcode(cbuf, Assembler::REX_WR);
 2493       } else {
 2494         emit_opcode(cbuf, Assembler::REX_WRB);
 2495       }
 2496     }
 2497   %}
 2498 
 2499   enc_class REX_reg_mem(rRegI reg, memory mem)
 2500   %{
 2501     if ($reg$$reg < 8) {
 2502       if ($mem$$base < 8) {
 2503         if ($mem$$index >= 8) {
 2504           emit_opcode(cbuf, Assembler::REX_X);
 2505         }
 2506       } else {
 2507         if ($mem$$index < 8) {
 2508           emit_opcode(cbuf, Assembler::REX_B);
 2509         } else {
 2510           emit_opcode(cbuf, Assembler::REX_XB);
 2511         }
 2512       }
 2513     } else {
 2514       if ($mem$$base < 8) {
 2515         if ($mem$$index < 8) {
 2516           emit_opcode(cbuf, Assembler::REX_R);
 2517         } else {
 2518           emit_opcode(cbuf, Assembler::REX_RX);
 2519         }
 2520       } else {
 2521         if ($mem$$index < 8) {
 2522           emit_opcode(cbuf, Assembler::REX_RB);
 2523         } else {
 2524           emit_opcode(cbuf, Assembler::REX_RXB);
 2525         }
 2526       }
 2527     }
 2528   %}
 2529 
 2530   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
 2531   %{
 2532     if ($reg$$reg < 8) {
 2533       if ($mem$$base < 8) {
 2534         if ($mem$$index < 8) {
 2535           emit_opcode(cbuf, Assembler::REX_W);
 2536         } else {
 2537           emit_opcode(cbuf, Assembler::REX_WX);
 2538         }
 2539       } else {
 2540         if ($mem$$index < 8) {
 2541           emit_opcode(cbuf, Assembler::REX_WB);
 2542         } else {
 2543           emit_opcode(cbuf, Assembler::REX_WXB);
 2544         }
 2545       }
 2546     } else {
 2547       if ($mem$$base < 8) {
 2548         if ($mem$$index < 8) {
 2549           emit_opcode(cbuf, Assembler::REX_WR);
 2550         } else {
 2551           emit_opcode(cbuf, Assembler::REX_WRX);
 2552         }
 2553       } else {
 2554         if ($mem$$index < 8) {
 2555           emit_opcode(cbuf, Assembler::REX_WRB);
 2556         } else {
 2557           emit_opcode(cbuf, Assembler::REX_WRXB);
 2558         }
 2559       }
 2560     }
 2561   %}
 2562 
 2563   enc_class reg_mem(rRegI ereg, memory mem)
 2564   %{
 2565     // High registers handle in encode_RegMem
 2566     int reg = $ereg$$reg;
 2567     int base = $mem$$base;
 2568     int index = $mem$$index;
 2569     int scale = $mem$$scale;
 2570     int disp = $mem$$disp;
 2571     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2572 
 2573     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
 2574   %}
 2575 
 2576   enc_class RM_opc_mem(immI rm_opcode, memory mem)
 2577   %{
 2578     int rm_byte_opcode = $rm_opcode$$constant;
 2579 
 2580     // High registers handle in encode_RegMem
 2581     int base = $mem$$base;
 2582     int index = $mem$$index;
 2583     int scale = $mem$$scale;
 2584     int displace = $mem$$disp;
 2585 
 2586     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
 2587                                             // working with static
 2588                                             // globals
 2589     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
 2590                   disp_reloc);
 2591   %}
 2592 
 2593   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
 2594   %{
 2595     int reg_encoding = $dst$$reg;
 2596     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2597     int index        = 0x04;            // 0x04 indicates no index
 2598     int scale        = 0x00;            // 0x00 indicates no scale
 2599     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2600     relocInfo::relocType disp_reloc = relocInfo::none;
 2601     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
 2602                   disp_reloc);
 2603   %}
 2604 
 2605   enc_class neg_reg(rRegI dst)
 2606   %{
 2607     int dstenc = $dst$$reg;
 2608     if (dstenc >= 8) {
 2609       emit_opcode(cbuf, Assembler::REX_B);
 2610       dstenc -= 8;
 2611     }
 2612     // NEG $dst
 2613     emit_opcode(cbuf, 0xF7);
 2614     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2615   %}
 2616 
 2617   enc_class neg_reg_wide(rRegI dst)
 2618   %{
 2619     int dstenc = $dst$$reg;
 2620     if (dstenc < 8) {
 2621       emit_opcode(cbuf, Assembler::REX_W);
 2622     } else {
 2623       emit_opcode(cbuf, Assembler::REX_WB);
 2624       dstenc -= 8;
 2625     }
 2626     // NEG $dst
 2627     emit_opcode(cbuf, 0xF7);
 2628     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2629   %}
 2630 
 2631   enc_class setLT_reg(rRegI dst)
 2632   %{
 2633     int dstenc = $dst$$reg;
 2634     if (dstenc >= 8) {
 2635       emit_opcode(cbuf, Assembler::REX_B);
 2636       dstenc -= 8;
 2637     } else if (dstenc >= 4) {
 2638       emit_opcode(cbuf, Assembler::REX);
 2639     }
 2640     // SETLT $dst
 2641     emit_opcode(cbuf, 0x0F);
 2642     emit_opcode(cbuf, 0x9C);
 2643     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2644   %}
 2645 
 2646   enc_class setNZ_reg(rRegI dst)
 2647   %{
 2648     int dstenc = $dst$$reg;
 2649     if (dstenc >= 8) {
 2650       emit_opcode(cbuf, Assembler::REX_B);
 2651       dstenc -= 8;
 2652     } else if (dstenc >= 4) {
 2653       emit_opcode(cbuf, Assembler::REX);
 2654     }
 2655     // SETNZ $dst
 2656     emit_opcode(cbuf, 0x0F);
 2657     emit_opcode(cbuf, 0x95);
 2658     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2659   %}
 2660 
 2661 
 2662   // Compare the lonogs and set -1, 0, or 1 into dst
 2663   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
 2664   %{
 2665     int src1enc = $src1$$reg;
 2666     int src2enc = $src2$$reg;
 2667     int dstenc = $dst$$reg;
 2668 
 2669     // cmpq $src1, $src2
 2670     if (src1enc < 8) {
 2671       if (src2enc < 8) {
 2672         emit_opcode(cbuf, Assembler::REX_W);
 2673       } else {
 2674         emit_opcode(cbuf, Assembler::REX_WB);
 2675       }
 2676     } else {
 2677       if (src2enc < 8) {
 2678         emit_opcode(cbuf, Assembler::REX_WR);
 2679       } else {
 2680         emit_opcode(cbuf, Assembler::REX_WRB);
 2681       }
 2682     }
 2683     emit_opcode(cbuf, 0x3B);
 2684     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
 2685 
 2686     // movl $dst, -1
 2687     if (dstenc >= 8) {
 2688       emit_opcode(cbuf, Assembler::REX_B);
 2689     }
 2690     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
 2691     emit_d32(cbuf, -1);
 2692 
 2693     // jl,s done
 2694     emit_opcode(cbuf, 0x7C);
 2695     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
 2696 
 2697     // setne $dst
 2698     if (dstenc >= 4) {
 2699       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
 2700     }
 2701     emit_opcode(cbuf, 0x0F);
 2702     emit_opcode(cbuf, 0x95);
 2703     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
 2704 
 2705     // movzbl $dst, $dst
 2706     if (dstenc >= 4) {
 2707       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
 2708     }
 2709     emit_opcode(cbuf, 0x0F);
 2710     emit_opcode(cbuf, 0xB6);
 2711     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
 2712   %}
 2713 
 2714   enc_class Push_ResultXD(regD dst) %{
 2715     MacroAssembler _masm(&cbuf);
 2716     __ fstp_d(Address(rsp, 0));
 2717     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2718     __ addptr(rsp, 8);
 2719   %}
 2720 
 2721   enc_class Push_SrcXD(regD src) %{
 2722     MacroAssembler _masm(&cbuf);
 2723     __ subptr(rsp, 8);
 2724     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2725     __ fld_d(Address(rsp, 0));
 2726   %}
 2727 
 2728 
 2729   enc_class enc_rethrow()
 2730   %{
 2731     cbuf.set_insts_mark();
 2732     emit_opcode(cbuf, 0xE9); // jmp entry
 2733     emit_d32_reloc(cbuf,
 2734                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
 2735                    runtime_call_Relocation::spec(),
 2736                    RELOC_DISP32);
 2737   %}
 2738 
 2739 %}
 2740 
 2741 
 2742 
 2743 //----------FRAME--------------------------------------------------------------
 2744 // Definition of frame structure and management information.
 2745 //
 2746 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2747 //                             |   (to get allocators register number
 2748 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2749 //  r   CALLER     |        |
 2750 //  o     |        +--------+      pad to even-align allocators stack-slot
 2751 //  w     V        |  pad0  |        numbers; owned by CALLER
 2752 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 2753 //  h     ^        |   in   |  5
 2754 //        |        |  args  |  4   Holes in incoming args owned by SELF
 2755 //  |     |        |        |  3
 2756 //  |     |        +--------+
 2757 //  V     |        | old out|      Empty on Intel, window on Sparc
 2758 //        |    old |preserve|      Must be even aligned.
 2759 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 2760 //        |        |   in   |  3   area for Intel ret address
 2761 //     Owned by    |preserve|      Empty on Sparc.
 2762 //       SELF      +--------+
 2763 //        |        |  pad2  |  2   pad to align old SP
 2764 //        |        +--------+  1
 2765 //        |        | locks  |  0
 2766 //        |        +--------+----> OptoReg::stack0(), even aligned
 2767 //        |        |  pad1  | 11   pad to align new SP
 2768 //        |        +--------+
 2769 //        |        |        | 10
 2770 //        |        | spills |  9   spills
 2771 //        V        |        |  8   (pad0 slot for callee)
 2772 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 2773 //        ^        |  out   |  7
 2774 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 2775 //     Owned by    +--------+
 2776 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 2777 //        |    new |preserve|      Must be even-aligned.
 2778 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 2779 //        |        |        |
 2780 //
 2781 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 2782 //         known from SELF's arguments and the Java calling convention.
 2783 //         Region 6-7 is determined per call site.
 2784 // Note 2: If the calling convention leaves holes in the incoming argument
 2785 //         area, those holes are owned by SELF.  Holes in the outgoing area
 2786 //         are owned by the CALLEE.  Holes should not be necessary in the
 2787 //         incoming area, as the Java calling convention is completely under
 2788 //         the control of the AD file.  Doubles can be sorted and packed to
 2789 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 2790 //         varargs C calling conventions.
 2791 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 2792 //         even aligned with pad0 as needed.
 2793 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 2794 //         region 6-11 is even aligned; it may be padded out more so that
 2795 //         the region from SP to FP meets the minimum stack alignment.
 2796 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 2797 //         alignment.  Region 11, pad1, may be dynamically extended so that
 2798 //         SP meets the minimum alignment.
 2799 
 2800 frame
 2801 %{
 2802   // These three registers define part of the calling convention
 2803   // between compiled code and the interpreter.
 2804   inline_cache_reg(RAX);                // Inline Cache Register
 2805 
 2806   // Optional: name the operand used by cisc-spilling to access
 2807   // [stack_pointer + offset]
 2808   cisc_spilling_operand_name(indOffset32);
 2809 
 2810   // Number of stack slots consumed by locking an object
 2811   sync_stack_slots(2);
 2812 
 2813   // Compiled code's Frame Pointer
 2814   frame_pointer(RSP);
 2815 
 2816   // Interpreter stores its frame pointer in a register which is
 2817   // stored to the stack by I2CAdaptors.
 2818   // I2CAdaptors convert from interpreted java to compiled java.
 2819   interpreter_frame_pointer(RBP);
 2820 
 2821   // Stack alignment requirement
 2822   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 2823 
 2824   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 2825   // for calls to C.  Supports the var-args backing area for register parms.
 2826   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 2827 
 2828   // The after-PROLOG location of the return address.  Location of
 2829   // return address specifies a type (REG or STACK) and a number
 2830   // representing the register number (i.e. - use a register name) or
 2831   // stack slot.
 2832   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 2833   // Otherwise, it is above the locks and verification slot and alignment word
 2834   return_addr(STACK - 2 +
 2835               align_up((Compile::current()->in_preserve_stack_slots() +
 2836                         Compile::current()->fixed_slots()),
 2837                        stack_alignment_in_slots()));
 2838 
 2839   // Location of compiled Java return values.  Same as C for now.
 2840   return_value
 2841   %{
 2842     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 2843            "only return normal values");
 2844 
 2845     static const int lo[Op_RegL + 1] = {
 2846       0,
 2847       0,
 2848       RAX_num,  // Op_RegN
 2849       RAX_num,  // Op_RegI
 2850       RAX_num,  // Op_RegP
 2851       XMM0_num, // Op_RegF
 2852       XMM0_num, // Op_RegD
 2853       RAX_num   // Op_RegL
 2854     };
 2855     static const int hi[Op_RegL + 1] = {
 2856       0,
 2857       0,
 2858       OptoReg::Bad, // Op_RegN
 2859       OptoReg::Bad, // Op_RegI
 2860       RAX_H_num,    // Op_RegP
 2861       OptoReg::Bad, // Op_RegF
 2862       XMM0b_num,    // Op_RegD
 2863       RAX_H_num     // Op_RegL
 2864     };
 2865     // Excluded flags and vector registers.
 2866     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2867     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2868   %}
 2869 %}
 2870 
 2871 //----------ATTRIBUTES---------------------------------------------------------
 2872 //----------Operand Attributes-------------------------------------------------
 2873 op_attrib op_cost(0);        // Required cost attribute
 2874 
 2875 //----------Instruction Attributes---------------------------------------------
 2876 ins_attrib ins_cost(100);       // Required cost attribute
 2877 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2878 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2879                                 // a non-matching short branch variant
 2880                                 // of some long branch?
 2881 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2882                                 // be a power of 2) specifies the
 2883                                 // alignment that some part of the
 2884                                 // instruction (not necessarily the
 2885                                 // start) requires.  If > 1, a
 2886                                 // compute_padding() function must be
 2887                                 // provided for the instruction
 2888 
 2889 //----------OPERANDS-----------------------------------------------------------
 2890 // Operand definitions must precede instruction definitions for correct parsing
 2891 // in the ADLC because operands constitute user defined types which are used in
 2892 // instruction definitions.
 2893 
 2894 //----------Simple Operands----------------------------------------------------
 2895 // Immediate Operands
 2896 // Integer Immediate
 2897 operand immI()
 2898 %{
 2899   match(ConI);
 2900 
 2901   op_cost(10);
 2902   format %{ %}
 2903   interface(CONST_INTER);
 2904 %}
 2905 
 2906 // Constant for test vs zero
 2907 operand immI_0()
 2908 %{
 2909   predicate(n->get_int() == 0);
 2910   match(ConI);
 2911 
 2912   op_cost(0);
 2913   format %{ %}
 2914   interface(CONST_INTER);
 2915 %}
 2916 
 2917 // Constant for increment
 2918 operand immI_1()
 2919 %{
 2920   predicate(n->get_int() == 1);
 2921   match(ConI);
 2922 
 2923   op_cost(0);
 2924   format %{ %}
 2925   interface(CONST_INTER);
 2926 %}
 2927 
 2928 // Constant for decrement
 2929 operand immI_M1()
 2930 %{
 2931   predicate(n->get_int() == -1);
 2932   match(ConI);
 2933 
 2934   op_cost(0);
 2935   format %{ %}
 2936   interface(CONST_INTER);
 2937 %}
 2938 
 2939 operand immI_2()
 2940 %{
 2941   predicate(n->get_int() == 2);
 2942   match(ConI);
 2943 
 2944   op_cost(0);
 2945   format %{ %}
 2946   interface(CONST_INTER);
 2947 %}
 2948 
 2949 operand immI_4()
 2950 %{
 2951   predicate(n->get_int() == 4);
 2952   match(ConI);
 2953 
 2954   op_cost(0);
 2955   format %{ %}
 2956   interface(CONST_INTER);
 2957 %}
 2958 
 2959 operand immI_8()
 2960 %{
 2961   predicate(n->get_int() == 8);
 2962   match(ConI);
 2963 
 2964   op_cost(0);
 2965   format %{ %}
 2966   interface(CONST_INTER);
 2967 %}
 2968 
 2969 // Valid scale values for addressing modes
 2970 operand immI2()
 2971 %{
 2972   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2973   match(ConI);
 2974 
 2975   format %{ %}
 2976   interface(CONST_INTER);
 2977 %}
 2978 
 2979 operand immU7()
 2980 %{
 2981   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2982   match(ConI);
 2983 
 2984   op_cost(5);
 2985   format %{ %}
 2986   interface(CONST_INTER);
 2987 %}
 2988 
 2989 operand immI8()
 2990 %{
 2991   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2992   match(ConI);
 2993 
 2994   op_cost(5);
 2995   format %{ %}
 2996   interface(CONST_INTER);
 2997 %}
 2998 
 2999 operand immU8()
 3000 %{
 3001   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3002   match(ConI);
 3003 
 3004   op_cost(5);
 3005   format %{ %}
 3006   interface(CONST_INTER);
 3007 %}
 3008 
 3009 operand immI16()
 3010 %{
 3011   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3012   match(ConI);
 3013 
 3014   op_cost(10);
 3015   format %{ %}
 3016   interface(CONST_INTER);
 3017 %}
 3018 
 3019 // Int Immediate non-negative
 3020 operand immU31()
 3021 %{
 3022   predicate(n->get_int() >= 0);
 3023   match(ConI);
 3024 
 3025   op_cost(0);
 3026   format %{ %}
 3027   interface(CONST_INTER);
 3028 %}
 3029 
 3030 // Constant for long shifts
 3031 operand immI_32()
 3032 %{
 3033   predicate( n->get_int() == 32 );
 3034   match(ConI);
 3035 
 3036   op_cost(0);
 3037   format %{ %}
 3038   interface(CONST_INTER);
 3039 %}
 3040 
 3041 // Constant for long shifts
 3042 operand immI_64()
 3043 %{
 3044   predicate( n->get_int() == 64 );
 3045   match(ConI);
 3046 
 3047   op_cost(0);
 3048   format %{ %}
 3049   interface(CONST_INTER);
 3050 %}
 3051 
 3052 // Pointer Immediate
 3053 operand immP()
 3054 %{
 3055   match(ConP);
 3056 
 3057   op_cost(10);
 3058   format %{ %}
 3059   interface(CONST_INTER);
 3060 %}
 3061 
 3062 // NULL Pointer Immediate
 3063 operand immP0()
 3064 %{
 3065   predicate(n->get_ptr() == 0);
 3066   match(ConP);
 3067 
 3068   op_cost(5);
 3069   format %{ %}
 3070   interface(CONST_INTER);
 3071 %}
 3072 
 3073 // Pointer Immediate
 3074 operand immN() %{
 3075   match(ConN);
 3076 
 3077   op_cost(10);
 3078   format %{ %}
 3079   interface(CONST_INTER);
 3080 %}
 3081 
 3082 operand immNKlass() %{
 3083   match(ConNKlass);
 3084 
 3085   op_cost(10);
 3086   format %{ %}
 3087   interface(CONST_INTER);
 3088 %}
 3089 
 3090 // NULL Pointer Immediate
 3091 operand immN0() %{
 3092   predicate(n->get_narrowcon() == 0);
 3093   match(ConN);
 3094 
 3095   op_cost(5);
 3096   format %{ %}
 3097   interface(CONST_INTER);
 3098 %}
 3099 
 3100 operand immP31()
 3101 %{
 3102   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3103             && (n->get_ptr() >> 31) == 0);
 3104   match(ConP);
 3105 
 3106   op_cost(5);
 3107   format %{ %}
 3108   interface(CONST_INTER);
 3109 %}
 3110 
 3111 
 3112 // Long Immediate
 3113 operand immL()
 3114 %{
 3115   match(ConL);
 3116 
 3117   op_cost(20);
 3118   format %{ %}
 3119   interface(CONST_INTER);
 3120 %}
 3121 
 3122 // Long Immediate 8-bit
 3123 operand immL8()
 3124 %{
 3125   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 3126   match(ConL);
 3127 
 3128   op_cost(5);
 3129   format %{ %}
 3130   interface(CONST_INTER);
 3131 %}
 3132 
 3133 // Long Immediate 32-bit unsigned
 3134 operand immUL32()
 3135 %{
 3136   predicate(n->get_long() == (unsigned int) (n->get_long()));
 3137   match(ConL);
 3138 
 3139   op_cost(10);
 3140   format %{ %}
 3141   interface(CONST_INTER);
 3142 %}
 3143 
 3144 // Long Immediate 32-bit signed
 3145 operand immL32()
 3146 %{
 3147   predicate(n->get_long() == (int) (n->get_long()));
 3148   match(ConL);
 3149 
 3150   op_cost(15);
 3151   format %{ %}
 3152   interface(CONST_INTER);
 3153 %}
 3154 
 3155 operand immL_Pow2()
 3156 %{
 3157   predicate(is_power_of_2((julong)n->get_long()));
 3158   match(ConL);
 3159 
 3160   op_cost(15);
 3161   format %{ %}
 3162   interface(CONST_INTER);
 3163 %}
 3164 
 3165 operand immL_NotPow2()
 3166 %{
 3167   predicate(is_power_of_2((julong)~n->get_long()));
 3168   match(ConL);
 3169 
 3170   op_cost(15);
 3171   format %{ %}
 3172   interface(CONST_INTER);
 3173 %}
 3174 
 3175 // Long Immediate zero
 3176 operand immL0()
 3177 %{
 3178   predicate(n->get_long() == 0L);
 3179   match(ConL);
 3180 
 3181   op_cost(10);
 3182   format %{ %}
 3183   interface(CONST_INTER);
 3184 %}
 3185 
 3186 // Constant for increment
 3187 operand immL1()
 3188 %{
 3189   predicate(n->get_long() == 1);
 3190   match(ConL);
 3191 
 3192   format %{ %}
 3193   interface(CONST_INTER);
 3194 %}
 3195 
 3196 // Constant for decrement
 3197 operand immL_M1()
 3198 %{
 3199   predicate(n->get_long() == -1);
 3200   match(ConL);
 3201 
 3202   format %{ %}
 3203   interface(CONST_INTER);
 3204 %}
 3205 
 3206 // Long Immediate: the value 10
 3207 operand immL10()
 3208 %{
 3209   predicate(n->get_long() == 10);
 3210   match(ConL);
 3211 
 3212   format %{ %}
 3213   interface(CONST_INTER);
 3214 %}
 3215 
 3216 // Long immediate from 0 to 127.
 3217 // Used for a shorter form of long mul by 10.
 3218 operand immL_127()
 3219 %{
 3220   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 3221   match(ConL);
 3222 
 3223   op_cost(10);
 3224   format %{ %}
 3225   interface(CONST_INTER);
 3226 %}
 3227 
 3228 // Long Immediate: low 32-bit mask
 3229 operand immL_32bits()
 3230 %{
 3231   predicate(n->get_long() == 0xFFFFFFFFL);
 3232   match(ConL);
 3233   op_cost(20);
 3234 
 3235   format %{ %}
 3236   interface(CONST_INTER);
 3237 %}
 3238 
 3239 // Int Immediate: 2^n-1, positive
 3240 operand immI_Pow2M1()
 3241 %{
 3242   predicate((n->get_int() > 0)
 3243             && is_power_of_2(n->get_int() + 1));
 3244   match(ConI);
 3245 
 3246   op_cost(20);
 3247   format %{ %}
 3248   interface(CONST_INTER);
 3249 %}
 3250 
 3251 // Float Immediate zero
 3252 operand immF0()
 3253 %{
 3254   predicate(jint_cast(n->getf()) == 0);
 3255   match(ConF);
 3256 
 3257   op_cost(5);
 3258   format %{ %}
 3259   interface(CONST_INTER);
 3260 %}
 3261 
 3262 // Float Immediate
 3263 operand immF()
 3264 %{
 3265   match(ConF);
 3266 
 3267   op_cost(15);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 // Double Immediate zero
 3273 operand immD0()
 3274 %{
 3275   predicate(jlong_cast(n->getd()) == 0);
 3276   match(ConD);
 3277 
 3278   op_cost(5);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 // Double Immediate
 3284 operand immD()
 3285 %{
 3286   match(ConD);
 3287 
 3288   op_cost(15);
 3289   format %{ %}
 3290   interface(CONST_INTER);
 3291 %}
 3292 
 3293 // Immediates for special shifts (sign extend)
 3294 
 3295 // Constants for increment
 3296 operand immI_16()
 3297 %{
 3298   predicate(n->get_int() == 16);
 3299   match(ConI);
 3300 
 3301   format %{ %}
 3302   interface(CONST_INTER);
 3303 %}
 3304 
 3305 operand immI_24()
 3306 %{
 3307   predicate(n->get_int() == 24);
 3308   match(ConI);
 3309 
 3310   format %{ %}
 3311   interface(CONST_INTER);
 3312 %}
 3313 
 3314 // Constant for byte-wide masking
 3315 operand immI_255()
 3316 %{
 3317   predicate(n->get_int() == 255);
 3318   match(ConI);
 3319 
 3320   format %{ %}
 3321   interface(CONST_INTER);
 3322 %}
 3323 
 3324 // Constant for short-wide masking
 3325 operand immI_65535()
 3326 %{
 3327   predicate(n->get_int() == 65535);
 3328   match(ConI);
 3329 
 3330   format %{ %}
 3331   interface(CONST_INTER);
 3332 %}
 3333 
 3334 // Constant for byte-wide masking
 3335 operand immL_255()
 3336 %{
 3337   predicate(n->get_long() == 255);
 3338   match(ConL);
 3339 
 3340   format %{ %}
 3341   interface(CONST_INTER);
 3342 %}
 3343 
 3344 // Constant for short-wide masking
 3345 operand immL_65535()
 3346 %{
 3347   predicate(n->get_long() == 65535);
 3348   match(ConL);
 3349 
 3350   format %{ %}
 3351   interface(CONST_INTER);
 3352 %}
 3353 
 3354 operand kReg()
 3355 %{
 3356   constraint(ALLOC_IN_RC(vectmask_reg));
 3357   match(RegVectMask);
 3358   format %{%}
 3359   interface(REG_INTER);
 3360 %}
 3361 
 3362 operand kReg_K1()
 3363 %{
 3364   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3365   match(RegVectMask);
 3366   format %{%}
 3367   interface(REG_INTER);
 3368 %}
 3369 
 3370 operand kReg_K2()
 3371 %{
 3372   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3373   match(RegVectMask);
 3374   format %{%}
 3375   interface(REG_INTER);
 3376 %}
 3377 
 3378 // Special Registers
 3379 operand kReg_K3()
 3380 %{
 3381   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3382   match(RegVectMask);
 3383   format %{%}
 3384   interface(REG_INTER);
 3385 %}
 3386 
 3387 operand kReg_K4()
 3388 %{
 3389   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3390   match(RegVectMask);
 3391   format %{%}
 3392   interface(REG_INTER);
 3393 %}
 3394 
 3395 operand kReg_K5()
 3396 %{
 3397   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3398   match(RegVectMask);
 3399   format %{%}
 3400   interface(REG_INTER);
 3401 %}
 3402 
 3403 operand kReg_K6()
 3404 %{
 3405   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3406   match(RegVectMask);
 3407   format %{%}
 3408   interface(REG_INTER);
 3409 %}
 3410 
 3411 // Special Registers
 3412 operand kReg_K7()
 3413 %{
 3414   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3415   match(RegVectMask);
 3416   format %{%}
 3417   interface(REG_INTER);
 3418 %}
 3419 
 3420 // Register Operands
 3421 // Integer Register
 3422 operand rRegI()
 3423 %{
 3424   constraint(ALLOC_IN_RC(int_reg));
 3425   match(RegI);
 3426 
 3427   match(rax_RegI);
 3428   match(rbx_RegI);
 3429   match(rcx_RegI);
 3430   match(rdx_RegI);
 3431   match(rdi_RegI);
 3432 
 3433   format %{ %}
 3434   interface(REG_INTER);
 3435 %}
 3436 
 3437 // Special Registers
 3438 operand rax_RegI()
 3439 %{
 3440   constraint(ALLOC_IN_RC(int_rax_reg));
 3441   match(RegI);
 3442   match(rRegI);
 3443 
 3444   format %{ "RAX" %}
 3445   interface(REG_INTER);
 3446 %}
 3447 
 3448 // Special Registers
 3449 operand rbx_RegI()
 3450 %{
 3451   constraint(ALLOC_IN_RC(int_rbx_reg));
 3452   match(RegI);
 3453   match(rRegI);
 3454 
 3455   format %{ "RBX" %}
 3456   interface(REG_INTER);
 3457 %}
 3458 
 3459 operand rcx_RegI()
 3460 %{
 3461   constraint(ALLOC_IN_RC(int_rcx_reg));
 3462   match(RegI);
 3463   match(rRegI);
 3464 
 3465   format %{ "RCX" %}
 3466   interface(REG_INTER);
 3467 %}
 3468 
 3469 operand rdx_RegI()
 3470 %{
 3471   constraint(ALLOC_IN_RC(int_rdx_reg));
 3472   match(RegI);
 3473   match(rRegI);
 3474 
 3475   format %{ "RDX" %}
 3476   interface(REG_INTER);
 3477 %}
 3478 
 3479 operand rdi_RegI()
 3480 %{
 3481   constraint(ALLOC_IN_RC(int_rdi_reg));
 3482   match(RegI);
 3483   match(rRegI);
 3484 
 3485   format %{ "RDI" %}
 3486   interface(REG_INTER);
 3487 %}
 3488 
 3489 operand no_rax_rdx_RegI()
 3490 %{
 3491   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 3492   match(RegI);
 3493   match(rbx_RegI);
 3494   match(rcx_RegI);
 3495   match(rdi_RegI);
 3496 
 3497   format %{ %}
 3498   interface(REG_INTER);
 3499 %}
 3500 
 3501 operand no_rbp_r13_RegI()
 3502 %{
 3503   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 3504   match(RegI);
 3505   match(rRegI);
 3506   match(rax_RegI);
 3507   match(rbx_RegI);
 3508   match(rcx_RegI);
 3509   match(rdx_RegI);
 3510   match(rdi_RegI);
 3511 
 3512   format %{ %}
 3513   interface(REG_INTER);
 3514 %}
 3515 
 3516 // Pointer Register
 3517 operand any_RegP()
 3518 %{
 3519   constraint(ALLOC_IN_RC(any_reg));
 3520   match(RegP);
 3521   match(rax_RegP);
 3522   match(rbx_RegP);
 3523   match(rdi_RegP);
 3524   match(rsi_RegP);
 3525   match(rbp_RegP);
 3526   match(r15_RegP);
 3527   match(rRegP);
 3528 
 3529   format %{ %}
 3530   interface(REG_INTER);
 3531 %}
 3532 
 3533 operand rRegP()
 3534 %{
 3535   constraint(ALLOC_IN_RC(ptr_reg));
 3536   match(RegP);
 3537   match(rax_RegP);
 3538   match(rbx_RegP);
 3539   match(rdi_RegP);
 3540   match(rsi_RegP);
 3541   match(rbp_RegP);  // See Q&A below about
 3542   match(r15_RegP);  // r15_RegP and rbp_RegP.
 3543 
 3544   format %{ %}
 3545   interface(REG_INTER);
 3546 %}
 3547 
 3548 operand rRegN() %{
 3549   constraint(ALLOC_IN_RC(int_reg));
 3550   match(RegN);
 3551 
 3552   format %{ %}
 3553   interface(REG_INTER);
 3554 %}
 3555 
 3556 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 3557 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 3558 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 3559 // The output of an instruction is controlled by the allocator, which respects
 3560 // register class masks, not match rules.  Unless an instruction mentions
 3561 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 3562 // by the allocator as an input.
 3563 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 3564 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 3565 // result, RBP is not included in the output of the instruction either.
 3566 
 3567 operand no_rax_RegP()
 3568 %{
 3569   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 3570   match(RegP);
 3571   match(rbx_RegP);
 3572   match(rsi_RegP);
 3573   match(rdi_RegP);
 3574 
 3575   format %{ %}
 3576   interface(REG_INTER);
 3577 %}
 3578 
 3579 // This operand is not allowed to use RBP even if
 3580 // RBP is not used to hold the frame pointer.
 3581 operand no_rbp_RegP()
 3582 %{
 3583   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 3584   match(RegP);
 3585   match(rbx_RegP);
 3586   match(rsi_RegP);
 3587   match(rdi_RegP);
 3588 
 3589   format %{ %}
 3590   interface(REG_INTER);
 3591 %}
 3592 
 3593 operand no_rax_rbx_RegP()
 3594 %{
 3595   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 3596   match(RegP);
 3597   match(rsi_RegP);
 3598   match(rdi_RegP);
 3599 
 3600   format %{ %}
 3601   interface(REG_INTER);
 3602 %}
 3603 
 3604 // Special Registers
 3605 // Return a pointer value
 3606 operand rax_RegP()
 3607 %{
 3608   constraint(ALLOC_IN_RC(ptr_rax_reg));
 3609   match(RegP);
 3610   match(rRegP);
 3611 
 3612   format %{ %}
 3613   interface(REG_INTER);
 3614 %}
 3615 
 3616 // Special Registers
 3617 // Return a compressed pointer value
 3618 operand rax_RegN()
 3619 %{
 3620   constraint(ALLOC_IN_RC(int_rax_reg));
 3621   match(RegN);
 3622   match(rRegN);
 3623 
 3624   format %{ %}
 3625   interface(REG_INTER);
 3626 %}
 3627 
 3628 // Used in AtomicAdd
 3629 operand rbx_RegP()
 3630 %{
 3631   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 3632   match(RegP);
 3633   match(rRegP);
 3634 
 3635   format %{ %}
 3636   interface(REG_INTER);
 3637 %}
 3638 
 3639 operand rsi_RegP()
 3640 %{
 3641   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 3642   match(RegP);
 3643   match(rRegP);
 3644 
 3645   format %{ %}
 3646   interface(REG_INTER);
 3647 %}
 3648 
 3649 operand rbp_RegP()
 3650 %{
 3651   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 3652   match(RegP);
 3653   match(rRegP);
 3654 
 3655   format %{ %}
 3656   interface(REG_INTER);
 3657 %}
 3658 
 3659 // Used in rep stosq
 3660 operand rdi_RegP()
 3661 %{
 3662   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 3663   match(RegP);
 3664   match(rRegP);
 3665 
 3666   format %{ %}
 3667   interface(REG_INTER);
 3668 %}
 3669 
 3670 operand r15_RegP()
 3671 %{
 3672   constraint(ALLOC_IN_RC(ptr_r15_reg));
 3673   match(RegP);
 3674   match(rRegP);
 3675 
 3676   format %{ %}
 3677   interface(REG_INTER);
 3678 %}
 3679 
 3680 operand rRegL()
 3681 %{
 3682   constraint(ALLOC_IN_RC(long_reg));
 3683   match(RegL);
 3684   match(rax_RegL);
 3685   match(rdx_RegL);
 3686 
 3687   format %{ %}
 3688   interface(REG_INTER);
 3689 %}
 3690 
 3691 // Special Registers
 3692 operand no_rax_rdx_RegL()
 3693 %{
 3694   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 3695   match(RegL);
 3696   match(rRegL);
 3697 
 3698   format %{ %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand rax_RegL()
 3703 %{
 3704   constraint(ALLOC_IN_RC(long_rax_reg));
 3705   match(RegL);
 3706   match(rRegL);
 3707 
 3708   format %{ "RAX" %}
 3709   interface(REG_INTER);
 3710 %}
 3711 
 3712 operand rcx_RegL()
 3713 %{
 3714   constraint(ALLOC_IN_RC(long_rcx_reg));
 3715   match(RegL);
 3716   match(rRegL);
 3717 
 3718   format %{ %}
 3719   interface(REG_INTER);
 3720 %}
 3721 
 3722 operand rdx_RegL()
 3723 %{
 3724   constraint(ALLOC_IN_RC(long_rdx_reg));
 3725   match(RegL);
 3726   match(rRegL);
 3727 
 3728   format %{ %}
 3729   interface(REG_INTER);
 3730 %}
 3731 
 3732 operand no_rbp_r13_RegL()
 3733 %{
 3734   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 3735   match(RegL);
 3736   match(rRegL);
 3737   match(rax_RegL);
 3738   match(rcx_RegL);
 3739   match(rdx_RegL);
 3740 
 3741   format %{ %}
 3742   interface(REG_INTER);
 3743 %}
 3744 
 3745 // Flags register, used as output of compare instructions
 3746 operand rFlagsReg()
 3747 %{
 3748   constraint(ALLOC_IN_RC(int_flags));
 3749   match(RegFlags);
 3750 
 3751   format %{ "RFLAGS" %}
 3752   interface(REG_INTER);
 3753 %}
 3754 
 3755 // Flags register, used as output of FLOATING POINT compare instructions
 3756 operand rFlagsRegU()
 3757 %{
 3758   constraint(ALLOC_IN_RC(int_flags));
 3759   match(RegFlags);
 3760 
 3761   format %{ "RFLAGS_U" %}
 3762   interface(REG_INTER);
 3763 %}
 3764 
 3765 operand rFlagsRegUCF() %{
 3766   constraint(ALLOC_IN_RC(int_flags));
 3767   match(RegFlags);
 3768   predicate(false);
 3769 
 3770   format %{ "RFLAGS_U_CF" %}
 3771   interface(REG_INTER);
 3772 %}
 3773 
 3774 // Float register operands
 3775 operand regF() %{
 3776    constraint(ALLOC_IN_RC(float_reg));
 3777    match(RegF);
 3778 
 3779    format %{ %}
 3780    interface(REG_INTER);
 3781 %}
 3782 
 3783 // Float register operands
 3784 operand legRegF() %{
 3785    constraint(ALLOC_IN_RC(float_reg_legacy));
 3786    match(RegF);
 3787 
 3788    format %{ %}
 3789    interface(REG_INTER);
 3790 %}
 3791 
 3792 // Float register operands
 3793 operand vlRegF() %{
 3794    constraint(ALLOC_IN_RC(float_reg_vl));
 3795    match(RegF);
 3796 
 3797    format %{ %}
 3798    interface(REG_INTER);
 3799 %}
 3800 
 3801 // Double register operands
 3802 operand regD() %{
 3803    constraint(ALLOC_IN_RC(double_reg));
 3804    match(RegD);
 3805 
 3806    format %{ %}
 3807    interface(REG_INTER);
 3808 %}
 3809 
 3810 // Double register operands
 3811 operand legRegD() %{
 3812    constraint(ALLOC_IN_RC(double_reg_legacy));
 3813    match(RegD);
 3814 
 3815    format %{ %}
 3816    interface(REG_INTER);
 3817 %}
 3818 
 3819 // Double register operands
 3820 operand vlRegD() %{
 3821    constraint(ALLOC_IN_RC(double_reg_vl));
 3822    match(RegD);
 3823 
 3824    format %{ %}
 3825    interface(REG_INTER);
 3826 %}
 3827 
 3828 //----------Memory Operands----------------------------------------------------
 3829 // Direct Memory Operand
 3830 // operand direct(immP addr)
 3831 // %{
 3832 //   match(addr);
 3833 
 3834 //   format %{ "[$addr]" %}
 3835 //   interface(MEMORY_INTER) %{
 3836 //     base(0xFFFFFFFF);
 3837 //     index(0x4);
 3838 //     scale(0x0);
 3839 //     disp($addr);
 3840 //   %}
 3841 // %}
 3842 
 3843 // Indirect Memory Operand
 3844 operand indirect(any_RegP reg)
 3845 %{
 3846   constraint(ALLOC_IN_RC(ptr_reg));
 3847   match(reg);
 3848 
 3849   format %{ "[$reg]" %}
 3850   interface(MEMORY_INTER) %{
 3851     base($reg);
 3852     index(0x4);
 3853     scale(0x0);
 3854     disp(0x0);
 3855   %}
 3856 %}
 3857 
 3858 // Indirect Memory Plus Short Offset Operand
 3859 operand indOffset8(any_RegP reg, immL8 off)
 3860 %{
 3861   constraint(ALLOC_IN_RC(ptr_reg));
 3862   match(AddP reg off);
 3863 
 3864   format %{ "[$reg + $off (8-bit)]" %}
 3865   interface(MEMORY_INTER) %{
 3866     base($reg);
 3867     index(0x4);
 3868     scale(0x0);
 3869     disp($off);
 3870   %}
 3871 %}
 3872 
 3873 // Indirect Memory Plus Long Offset Operand
 3874 operand indOffset32(any_RegP reg, immL32 off)
 3875 %{
 3876   constraint(ALLOC_IN_RC(ptr_reg));
 3877   match(AddP reg off);
 3878 
 3879   format %{ "[$reg + $off (32-bit)]" %}
 3880   interface(MEMORY_INTER) %{
 3881     base($reg);
 3882     index(0x4);
 3883     scale(0x0);
 3884     disp($off);
 3885   %}
 3886 %}
 3887 
 3888 // Indirect Memory Plus Index Register Plus Offset Operand
 3889 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3890 %{
 3891   constraint(ALLOC_IN_RC(ptr_reg));
 3892   match(AddP (AddP reg lreg) off);
 3893 
 3894   op_cost(10);
 3895   format %{"[$reg + $off + $lreg]" %}
 3896   interface(MEMORY_INTER) %{
 3897     base($reg);
 3898     index($lreg);
 3899     scale(0x0);
 3900     disp($off);
 3901   %}
 3902 %}
 3903 
 3904 // Indirect Memory Plus Index Register Plus Offset Operand
 3905 operand indIndex(any_RegP reg, rRegL lreg)
 3906 %{
 3907   constraint(ALLOC_IN_RC(ptr_reg));
 3908   match(AddP reg lreg);
 3909 
 3910   op_cost(10);
 3911   format %{"[$reg + $lreg]" %}
 3912   interface(MEMORY_INTER) %{
 3913     base($reg);
 3914     index($lreg);
 3915     scale(0x0);
 3916     disp(0x0);
 3917   %}
 3918 %}
 3919 
 3920 // Indirect Memory Times Scale Plus Index Register
 3921 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3922 %{
 3923   constraint(ALLOC_IN_RC(ptr_reg));
 3924   match(AddP reg (LShiftL lreg scale));
 3925 
 3926   op_cost(10);
 3927   format %{"[$reg + $lreg << $scale]" %}
 3928   interface(MEMORY_INTER) %{
 3929     base($reg);
 3930     index($lreg);
 3931     scale($scale);
 3932     disp(0x0);
 3933   %}
 3934 %}
 3935 
 3936 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3937 %{
 3938   constraint(ALLOC_IN_RC(ptr_reg));
 3939   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3940   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3941 
 3942   op_cost(10);
 3943   format %{"[$reg + pos $idx << $scale]" %}
 3944   interface(MEMORY_INTER) %{
 3945     base($reg);
 3946     index($idx);
 3947     scale($scale);
 3948     disp(0x0);
 3949   %}
 3950 %}
 3951 
 3952 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3953 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3954 %{
 3955   constraint(ALLOC_IN_RC(ptr_reg));
 3956   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3957 
 3958   op_cost(10);
 3959   format %{"[$reg + $off + $lreg << $scale]" %}
 3960   interface(MEMORY_INTER) %{
 3961     base($reg);
 3962     index($lreg);
 3963     scale($scale);
 3964     disp($off);
 3965   %}
 3966 %}
 3967 
 3968 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3969 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3970 %{
 3971   constraint(ALLOC_IN_RC(ptr_reg));
 3972   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3973   match(AddP (AddP reg (ConvI2L idx)) off);
 3974 
 3975   op_cost(10);
 3976   format %{"[$reg + $off + $idx]" %}
 3977   interface(MEMORY_INTER) %{
 3978     base($reg);
 3979     index($idx);
 3980     scale(0x0);
 3981     disp($off);
 3982   %}
 3983 %}
 3984 
 3985 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3986 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3987 %{
 3988   constraint(ALLOC_IN_RC(ptr_reg));
 3989   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3990   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3991 
 3992   op_cost(10);
 3993   format %{"[$reg + $off + $idx << $scale]" %}
 3994   interface(MEMORY_INTER) %{
 3995     base($reg);
 3996     index($idx);
 3997     scale($scale);
 3998     disp($off);
 3999   %}
 4000 %}
 4001 
 4002 // Indirect Narrow Oop Operand
 4003 operand indCompressedOop(rRegN reg) %{
 4004   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4005   constraint(ALLOC_IN_RC(ptr_reg));
 4006   match(DecodeN reg);
 4007 
 4008   op_cost(10);
 4009   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 4010   interface(MEMORY_INTER) %{
 4011     base(0xc); // R12
 4012     index($reg);
 4013     scale(0x3);
 4014     disp(0x0);
 4015   %}
 4016 %}
 4017 
 4018 // Indirect Narrow Oop Plus Offset Operand
 4019 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4020 // we can't free r12 even with CompressedOops::base() == NULL.
 4021 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4022   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4023   constraint(ALLOC_IN_RC(ptr_reg));
 4024   match(AddP (DecodeN reg) off);
 4025 
 4026   op_cost(10);
 4027   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4028   interface(MEMORY_INTER) %{
 4029     base(0xc); // R12
 4030     index($reg);
 4031     scale(0x3);
 4032     disp($off);
 4033   %}
 4034 %}
 4035 
 4036 // Indirect Memory Operand
 4037 operand indirectNarrow(rRegN reg)
 4038 %{
 4039   predicate(CompressedOops::shift() == 0);
 4040   constraint(ALLOC_IN_RC(ptr_reg));
 4041   match(DecodeN reg);
 4042 
 4043   format %{ "[$reg]" %}
 4044   interface(MEMORY_INTER) %{
 4045     base($reg);
 4046     index(0x4);
 4047     scale(0x0);
 4048     disp(0x0);
 4049   %}
 4050 %}
 4051 
 4052 // Indirect Memory Plus Short Offset Operand
 4053 operand indOffset8Narrow(rRegN reg, immL8 off)
 4054 %{
 4055   predicate(CompressedOops::shift() == 0);
 4056   constraint(ALLOC_IN_RC(ptr_reg));
 4057   match(AddP (DecodeN reg) off);
 4058 
 4059   format %{ "[$reg + $off (8-bit)]" %}
 4060   interface(MEMORY_INTER) %{
 4061     base($reg);
 4062     index(0x4);
 4063     scale(0x0);
 4064     disp($off);
 4065   %}
 4066 %}
 4067 
 4068 // Indirect Memory Plus Long Offset Operand
 4069 operand indOffset32Narrow(rRegN reg, immL32 off)
 4070 %{
 4071   predicate(CompressedOops::shift() == 0);
 4072   constraint(ALLOC_IN_RC(ptr_reg));
 4073   match(AddP (DecodeN reg) off);
 4074 
 4075   format %{ "[$reg + $off (32-bit)]" %}
 4076   interface(MEMORY_INTER) %{
 4077     base($reg);
 4078     index(0x4);
 4079     scale(0x0);
 4080     disp($off);
 4081   %}
 4082 %}
 4083 
 4084 // Indirect Memory Plus Index Register Plus Offset Operand
 4085 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 4086 %{
 4087   predicate(CompressedOops::shift() == 0);
 4088   constraint(ALLOC_IN_RC(ptr_reg));
 4089   match(AddP (AddP (DecodeN reg) lreg) off);
 4090 
 4091   op_cost(10);
 4092   format %{"[$reg + $off + $lreg]" %}
 4093   interface(MEMORY_INTER) %{
 4094     base($reg);
 4095     index($lreg);
 4096     scale(0x0);
 4097     disp($off);
 4098   %}
 4099 %}
 4100 
 4101 // Indirect Memory Plus Index Register Plus Offset Operand
 4102 operand indIndexNarrow(rRegN reg, rRegL lreg)
 4103 %{
 4104   predicate(CompressedOops::shift() == 0);
 4105   constraint(ALLOC_IN_RC(ptr_reg));
 4106   match(AddP (DecodeN reg) lreg);
 4107 
 4108   op_cost(10);
 4109   format %{"[$reg + $lreg]" %}
 4110   interface(MEMORY_INTER) %{
 4111     base($reg);
 4112     index($lreg);
 4113     scale(0x0);
 4114     disp(0x0);
 4115   %}
 4116 %}
 4117 
 4118 // Indirect Memory Times Scale Plus Index Register
 4119 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 4120 %{
 4121   predicate(CompressedOops::shift() == 0);
 4122   constraint(ALLOC_IN_RC(ptr_reg));
 4123   match(AddP (DecodeN reg) (LShiftL lreg scale));
 4124 
 4125   op_cost(10);
 4126   format %{"[$reg + $lreg << $scale]" %}
 4127   interface(MEMORY_INTER) %{
 4128     base($reg);
 4129     index($lreg);
 4130     scale($scale);
 4131     disp(0x0);
 4132   %}
 4133 %}
 4134 
 4135 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4136 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 4137 %{
 4138   predicate(CompressedOops::shift() == 0);
 4139   constraint(ALLOC_IN_RC(ptr_reg));
 4140   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 4141 
 4142   op_cost(10);
 4143   format %{"[$reg + $off + $lreg << $scale]" %}
 4144   interface(MEMORY_INTER) %{
 4145     base($reg);
 4146     index($lreg);
 4147     scale($scale);
 4148     disp($off);
 4149   %}
 4150 %}
 4151 
 4152 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 4153 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 4154 %{
 4155   constraint(ALLOC_IN_RC(ptr_reg));
 4156   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 4157   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 4158 
 4159   op_cost(10);
 4160   format %{"[$reg + $off + $idx]" %}
 4161   interface(MEMORY_INTER) %{
 4162     base($reg);
 4163     index($idx);
 4164     scale(0x0);
 4165     disp($off);
 4166   %}
 4167 %}
 4168 
 4169 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4170 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 4171 %{
 4172   constraint(ALLOC_IN_RC(ptr_reg));
 4173   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4174   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 4175 
 4176   op_cost(10);
 4177   format %{"[$reg + $off + $idx << $scale]" %}
 4178   interface(MEMORY_INTER) %{
 4179     base($reg);
 4180     index($idx);
 4181     scale($scale);
 4182     disp($off);
 4183   %}
 4184 %}
 4185 
 4186 //----------Special Memory Operands--------------------------------------------
 4187 // Stack Slot Operand - This operand is used for loading and storing temporary
 4188 //                      values on the stack where a match requires a value to
 4189 //                      flow through memory.
 4190 operand stackSlotP(sRegP reg)
 4191 %{
 4192   constraint(ALLOC_IN_RC(stack_slots));
 4193   // No match rule because this operand is only generated in matching
 4194 
 4195   format %{ "[$reg]" %}
 4196   interface(MEMORY_INTER) %{
 4197     base(0x4);   // RSP
 4198     index(0x4);  // No Index
 4199     scale(0x0);  // No Scale
 4200     disp($reg);  // Stack Offset
 4201   %}
 4202 %}
 4203 
 4204 operand stackSlotI(sRegI reg)
 4205 %{
 4206   constraint(ALLOC_IN_RC(stack_slots));
 4207   // No match rule because this operand is only generated in matching
 4208 
 4209   format %{ "[$reg]" %}
 4210   interface(MEMORY_INTER) %{
 4211     base(0x4);   // RSP
 4212     index(0x4);  // No Index
 4213     scale(0x0);  // No Scale
 4214     disp($reg);  // Stack Offset
 4215   %}
 4216 %}
 4217 
 4218 operand stackSlotF(sRegF reg)
 4219 %{
 4220   constraint(ALLOC_IN_RC(stack_slots));
 4221   // No match rule because this operand is only generated in matching
 4222 
 4223   format %{ "[$reg]" %}
 4224   interface(MEMORY_INTER) %{
 4225     base(0x4);   // RSP
 4226     index(0x4);  // No Index
 4227     scale(0x0);  // No Scale
 4228     disp($reg);  // Stack Offset
 4229   %}
 4230 %}
 4231 
 4232 operand stackSlotD(sRegD reg)
 4233 %{
 4234   constraint(ALLOC_IN_RC(stack_slots));
 4235   // No match rule because this operand is only generated in matching
 4236 
 4237   format %{ "[$reg]" %}
 4238   interface(MEMORY_INTER) %{
 4239     base(0x4);   // RSP
 4240     index(0x4);  // No Index
 4241     scale(0x0);  // No Scale
 4242     disp($reg);  // Stack Offset
 4243   %}
 4244 %}
 4245 operand stackSlotL(sRegL reg)
 4246 %{
 4247   constraint(ALLOC_IN_RC(stack_slots));
 4248   // No match rule because this operand is only generated in matching
 4249 
 4250   format %{ "[$reg]" %}
 4251   interface(MEMORY_INTER) %{
 4252     base(0x4);   // RSP
 4253     index(0x4);  // No Index
 4254     scale(0x0);  // No Scale
 4255     disp($reg);  // Stack Offset
 4256   %}
 4257 %}
 4258 
 4259 //----------Conditional Branch Operands----------------------------------------
 4260 // Comparison Op  - This is the operation of the comparison, and is limited to
 4261 //                  the following set of codes:
 4262 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4263 //
 4264 // Other attributes of the comparison, such as unsignedness, are specified
 4265 // by the comparison instruction that sets a condition code flags register.
 4266 // That result is represented by a flags operand whose subtype is appropriate
 4267 // to the unsignedness (etc.) of the comparison.
 4268 //
 4269 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4270 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4271 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4272 
 4273 // Comparison Code
 4274 operand cmpOp()
 4275 %{
 4276   match(Bool);
 4277 
 4278   format %{ "" %}
 4279   interface(COND_INTER) %{
 4280     equal(0x4, "e");
 4281     not_equal(0x5, "ne");
 4282     less(0xC, "l");
 4283     greater_equal(0xD, "ge");
 4284     less_equal(0xE, "le");
 4285     greater(0xF, "g");
 4286     overflow(0x0, "o");
 4287     no_overflow(0x1, "no");
 4288   %}
 4289 %}
 4290 
 4291 // Comparison Code, unsigned compare.  Used by FP also, with
 4292 // C2 (unordered) turned into GT or LT already.  The other bits
 4293 // C0 and C3 are turned into Carry & Zero flags.
 4294 operand cmpOpU()
 4295 %{
 4296   match(Bool);
 4297 
 4298   format %{ "" %}
 4299   interface(COND_INTER) %{
 4300     equal(0x4, "e");
 4301     not_equal(0x5, "ne");
 4302     less(0x2, "b");
 4303     greater_equal(0x3, "ae");
 4304     less_equal(0x6, "be");
 4305     greater(0x7, "a");
 4306     overflow(0x0, "o");
 4307     no_overflow(0x1, "no");
 4308   %}
 4309 %}
 4310 
 4311 
 4312 // Floating comparisons that don't require any fixup for the unordered case,
 4313 // If both inputs of the comparison are the same, ZF is always set so we
 4314 // don't need to use cmpOpUCF2 for eq/ne
 4315 operand cmpOpUCF() %{
 4316   match(Bool);
 4317   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4318             n->as_Bool()->_test._test == BoolTest::ge ||
 4319             n->as_Bool()->_test._test == BoolTest::le ||
 4320             n->as_Bool()->_test._test == BoolTest::gt ||
 4321             n->in(1)->in(1) == n->in(1)->in(2));
 4322   format %{ "" %}
 4323   interface(COND_INTER) %{
 4324     equal(0xb, "np");
 4325     not_equal(0xa, "p");
 4326     less(0x2, "b");
 4327     greater_equal(0x3, "ae");
 4328     less_equal(0x6, "be");
 4329     greater(0x7, "a");
 4330     overflow(0x0, "o");
 4331     no_overflow(0x1, "no");
 4332   %}
 4333 %}
 4334 
 4335 
 4336 // Floating comparisons that can be fixed up with extra conditional jumps
 4337 operand cmpOpUCF2() %{
 4338   match(Bool);
 4339   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 4340              n->as_Bool()->_test._test == BoolTest::eq) &&
 4341             n->in(1)->in(1) != n->in(1)->in(2));
 4342   format %{ "" %}
 4343   interface(COND_INTER) %{
 4344     equal(0x4, "e");
 4345     not_equal(0x5, "ne");
 4346     less(0x2, "b");
 4347     greater_equal(0x3, "ae");
 4348     less_equal(0x6, "be");
 4349     greater(0x7, "a");
 4350     overflow(0x0, "o");
 4351     no_overflow(0x1, "no");
 4352   %}
 4353 %}
 4354 
 4355 //----------OPERAND CLASSES----------------------------------------------------
 4356 // Operand Classes are groups of operands that are used as to simplify
 4357 // instruction definitions by not requiring the AD writer to specify separate
 4358 // instructions for every form of operand when the instruction accepts
 4359 // multiple operand types with the same basic encoding and format.  The classic
 4360 // case of this is memory operands.
 4361 
 4362 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4363                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4364                indCompressedOop, indCompressedOopOffset,
 4365                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4366                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4367                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4368 
 4369 //----------PIPELINE-----------------------------------------------------------
 4370 // Rules which define the behavior of the target architectures pipeline.
 4371 pipeline %{
 4372 
 4373 //----------ATTRIBUTES---------------------------------------------------------
 4374 attributes %{
 4375   variable_size_instructions;        // Fixed size instructions
 4376   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4377   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4378   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4379   instruction_fetch_units = 1;       // of 16 bytes
 4380 
 4381   // List of nop instructions
 4382   nops( MachNop );
 4383 %}
 4384 
 4385 //----------RESOURCES----------------------------------------------------------
 4386 // Resources are the functional units available to the machine
 4387 
 4388 // Generic P2/P3 pipeline
 4389 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4390 // 3 instructions decoded per cycle.
 4391 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4392 // 3 ALU op, only ALU0 handles mul instructions.
 4393 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4394            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 4395            BR, FPU,
 4396            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 4397 
 4398 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4399 // Pipeline Description specifies the stages in the machine's pipeline
 4400 
 4401 // Generic P2/P3 pipeline
 4402 pipe_desc(S0, S1, S2, S3, S4, S5);
 4403 
 4404 //----------PIPELINE CLASSES---------------------------------------------------
 4405 // Pipeline Classes describe the stages in which input and output are
 4406 // referenced by the hardware pipeline.
 4407 
 4408 // Naming convention: ialu or fpu
 4409 // Then: _reg
 4410 // Then: _reg if there is a 2nd register
 4411 // Then: _long if it's a pair of instructions implementing a long
 4412 // Then: _fat if it requires the big decoder
 4413 //   Or: _mem if it requires the big decoder and a memory unit.
 4414 
 4415 // Integer ALU reg operation
 4416 pipe_class ialu_reg(rRegI dst)
 4417 %{
 4418     single_instruction;
 4419     dst    : S4(write);
 4420     dst    : S3(read);
 4421     DECODE : S0;        // any decoder
 4422     ALU    : S3;        // any alu
 4423 %}
 4424 
 4425 // Long ALU reg operation
 4426 pipe_class ialu_reg_long(rRegL dst)
 4427 %{
 4428     instruction_count(2);
 4429     dst    : S4(write);
 4430     dst    : S3(read);
 4431     DECODE : S0(2);     // any 2 decoders
 4432     ALU    : S3(2);     // both alus
 4433 %}
 4434 
 4435 // Integer ALU reg operation using big decoder
 4436 pipe_class ialu_reg_fat(rRegI dst)
 4437 %{
 4438     single_instruction;
 4439     dst    : S4(write);
 4440     dst    : S3(read);
 4441     D0     : S0;        // big decoder only
 4442     ALU    : S3;        // any alu
 4443 %}
 4444 
 4445 // Integer ALU reg-reg operation
 4446 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 4447 %{
 4448     single_instruction;
 4449     dst    : S4(write);
 4450     src    : S3(read);
 4451     DECODE : S0;        // any decoder
 4452     ALU    : S3;        // any alu
 4453 %}
 4454 
 4455 // Integer ALU reg-reg operation
 4456 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 4457 %{
 4458     single_instruction;
 4459     dst    : S4(write);
 4460     src    : S3(read);
 4461     D0     : S0;        // big decoder only
 4462     ALU    : S3;        // any alu
 4463 %}
 4464 
 4465 // Integer ALU reg-mem operation
 4466 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 4467 %{
 4468     single_instruction;
 4469     dst    : S5(write);
 4470     mem    : S3(read);
 4471     D0     : S0;        // big decoder only
 4472     ALU    : S4;        // any alu
 4473     MEM    : S3;        // any mem
 4474 %}
 4475 
 4476 // Integer mem operation (prefetch)
 4477 pipe_class ialu_mem(memory mem)
 4478 %{
 4479     single_instruction;
 4480     mem    : S3(read);
 4481     D0     : S0;        // big decoder only
 4482     MEM    : S3;        // any mem
 4483 %}
 4484 
 4485 // Integer Store to Memory
 4486 pipe_class ialu_mem_reg(memory mem, rRegI src)
 4487 %{
 4488     single_instruction;
 4489     mem    : S3(read);
 4490     src    : S5(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S4;        // any alu
 4493     MEM    : S3;
 4494 %}
 4495 
 4496 // // Long Store to Memory
 4497 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 4498 // %{
 4499 //     instruction_count(2);
 4500 //     mem    : S3(read);
 4501 //     src    : S5(read);
 4502 //     D0     : S0(2);          // big decoder only; twice
 4503 //     ALU    : S4(2);     // any 2 alus
 4504 //     MEM    : S3(2);  // Both mems
 4505 // %}
 4506 
 4507 // Integer Store to Memory
 4508 pipe_class ialu_mem_imm(memory mem)
 4509 %{
 4510     single_instruction;
 4511     mem    : S3(read);
 4512     D0     : S0;        // big decoder only
 4513     ALU    : S4;        // any alu
 4514     MEM    : S3;
 4515 %}
 4516 
 4517 // Integer ALU0 reg-reg operation
 4518 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 4519 %{
 4520     single_instruction;
 4521     dst    : S4(write);
 4522     src    : S3(read);
 4523     D0     : S0;        // Big decoder only
 4524     ALU0   : S3;        // only alu0
 4525 %}
 4526 
 4527 // Integer ALU0 reg-mem operation
 4528 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 4529 %{
 4530     single_instruction;
 4531     dst    : S5(write);
 4532     mem    : S3(read);
 4533     D0     : S0;        // big decoder only
 4534     ALU0   : S4;        // ALU0 only
 4535     MEM    : S3;        // any mem
 4536 %}
 4537 
 4538 // Integer ALU reg-reg operation
 4539 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 4540 %{
 4541     single_instruction;
 4542     cr     : S4(write);
 4543     src1   : S3(read);
 4544     src2   : S3(read);
 4545     DECODE : S0;        // any decoder
 4546     ALU    : S3;        // any alu
 4547 %}
 4548 
 4549 // Integer ALU reg-imm operation
 4550 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 4551 %{
 4552     single_instruction;
 4553     cr     : S4(write);
 4554     src1   : S3(read);
 4555     DECODE : S0;        // any decoder
 4556     ALU    : S3;        // any alu
 4557 %}
 4558 
 4559 // Integer ALU reg-mem operation
 4560 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 4561 %{
 4562     single_instruction;
 4563     cr     : S4(write);
 4564     src1   : S3(read);
 4565     src2   : S3(read);
 4566     D0     : S0;        // big decoder only
 4567     ALU    : S4;        // any alu
 4568     MEM    : S3;
 4569 %}
 4570 
 4571 // Conditional move reg-reg
 4572 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 4573 %{
 4574     instruction_count(4);
 4575     y      : S4(read);
 4576     q      : S3(read);
 4577     p      : S3(read);
 4578     DECODE : S0(4);     // any decoder
 4579 %}
 4580 
 4581 // Conditional move reg-reg
 4582 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 4583 %{
 4584     single_instruction;
 4585     dst    : S4(write);
 4586     src    : S3(read);
 4587     cr     : S3(read);
 4588     DECODE : S0;        // any decoder
 4589 %}
 4590 
 4591 // Conditional move reg-mem
 4592 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 4593 %{
 4594     single_instruction;
 4595     dst    : S4(write);
 4596     src    : S3(read);
 4597     cr     : S3(read);
 4598     DECODE : S0;        // any decoder
 4599     MEM    : S3;
 4600 %}
 4601 
 4602 // Conditional move reg-reg long
 4603 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 4604 %{
 4605     single_instruction;
 4606     dst    : S4(write);
 4607     src    : S3(read);
 4608     cr     : S3(read);
 4609     DECODE : S0(2);     // any 2 decoders
 4610 %}
 4611 
 4612 // XXX
 4613 // // Conditional move double reg-reg
 4614 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
 4615 // %{
 4616 //     single_instruction;
 4617 //     dst    : S4(write);
 4618 //     src    : S3(read);
 4619 //     cr     : S3(read);
 4620 //     DECODE : S0;     // any decoder
 4621 // %}
 4622 
 4623 // Float reg-reg operation
 4624 pipe_class fpu_reg(regD dst)
 4625 %{
 4626     instruction_count(2);
 4627     dst    : S3(read);
 4628     DECODE : S0(2);     // any 2 decoders
 4629     FPU    : S3;
 4630 %}
 4631 
 4632 // Float reg-reg operation
 4633 pipe_class fpu_reg_reg(regD dst, regD src)
 4634 %{
 4635     instruction_count(2);
 4636     dst    : S4(write);
 4637     src    : S3(read);
 4638     DECODE : S0(2);     // any 2 decoders
 4639     FPU    : S3;
 4640 %}
 4641 
 4642 // Float reg-reg operation
 4643 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 4644 %{
 4645     instruction_count(3);
 4646     dst    : S4(write);
 4647     src1   : S3(read);
 4648     src2   : S3(read);
 4649     DECODE : S0(3);     // any 3 decoders
 4650     FPU    : S3(2);
 4651 %}
 4652 
 4653 // Float reg-reg operation
 4654 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 4655 %{
 4656     instruction_count(4);
 4657     dst    : S4(write);
 4658     src1   : S3(read);
 4659     src2   : S3(read);
 4660     src3   : S3(read);
 4661     DECODE : S0(4);     // any 3 decoders
 4662     FPU    : S3(2);
 4663 %}
 4664 
 4665 // Float reg-reg operation
 4666 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 4667 %{
 4668     instruction_count(4);
 4669     dst    : S4(write);
 4670     src1   : S3(read);
 4671     src2   : S3(read);
 4672     src3   : S3(read);
 4673     DECODE : S1(3);     // any 3 decoders
 4674     D0     : S0;        // Big decoder only
 4675     FPU    : S3(2);
 4676     MEM    : S3;
 4677 %}
 4678 
 4679 // Float reg-mem operation
 4680 pipe_class fpu_reg_mem(regD dst, memory mem)
 4681 %{
 4682     instruction_count(2);
 4683     dst    : S5(write);
 4684     mem    : S3(read);
 4685     D0     : S0;        // big decoder only
 4686     DECODE : S1;        // any decoder for FPU POP
 4687     FPU    : S4;
 4688     MEM    : S3;        // any mem
 4689 %}
 4690 
 4691 // Float reg-mem operation
 4692 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 4693 %{
 4694     instruction_count(3);
 4695     dst    : S5(write);
 4696     src1   : S3(read);
 4697     mem    : S3(read);
 4698     D0     : S0;        // big decoder only
 4699     DECODE : S1(2);     // any decoder for FPU POP
 4700     FPU    : S4;
 4701     MEM    : S3;        // any mem
 4702 %}
 4703 
 4704 // Float mem-reg operation
 4705 pipe_class fpu_mem_reg(memory mem, regD src)
 4706 %{
 4707     instruction_count(2);
 4708     src    : S5(read);
 4709     mem    : S3(read);
 4710     DECODE : S0;        // any decoder for FPU PUSH
 4711     D0     : S1;        // big decoder only
 4712     FPU    : S4;
 4713     MEM    : S3;        // any mem
 4714 %}
 4715 
 4716 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 4717 %{
 4718     instruction_count(3);
 4719     src1   : S3(read);
 4720     src2   : S3(read);
 4721     mem    : S3(read);
 4722     DECODE : S0(2);     // any decoder for FPU PUSH
 4723     D0     : S1;        // big decoder only
 4724     FPU    : S4;
 4725     MEM    : S3;        // any mem
 4726 %}
 4727 
 4728 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 4729 %{
 4730     instruction_count(3);
 4731     src1   : S3(read);
 4732     src2   : S3(read);
 4733     mem    : S4(read);
 4734     DECODE : S0;        // any decoder for FPU PUSH
 4735     D0     : S0(2);     // big decoder only
 4736     FPU    : S4;
 4737     MEM    : S3(2);     // any mem
 4738 %}
 4739 
 4740 pipe_class fpu_mem_mem(memory dst, memory src1)
 4741 %{
 4742     instruction_count(2);
 4743     src1   : S3(read);
 4744     dst    : S4(read);
 4745     D0     : S0(2);     // big decoder only
 4746     MEM    : S3(2);     // any mem
 4747 %}
 4748 
 4749 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 4750 %{
 4751     instruction_count(3);
 4752     src1   : S3(read);
 4753     src2   : S3(read);
 4754     dst    : S4(read);
 4755     D0     : S0(3);     // big decoder only
 4756     FPU    : S4;
 4757     MEM    : S3(3);     // any mem
 4758 %}
 4759 
 4760 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 4761 %{
 4762     instruction_count(3);
 4763     src1   : S4(read);
 4764     mem    : S4(read);
 4765     DECODE : S0;        // any decoder for FPU PUSH
 4766     D0     : S0(2);     // big decoder only
 4767     FPU    : S4;
 4768     MEM    : S3(2);     // any mem
 4769 %}
 4770 
 4771 // Float load constant
 4772 pipe_class fpu_reg_con(regD dst)
 4773 %{
 4774     instruction_count(2);
 4775     dst    : S5(write);
 4776     D0     : S0;        // big decoder only for the load
 4777     DECODE : S1;        // any decoder for FPU POP
 4778     FPU    : S4;
 4779     MEM    : S3;        // any mem
 4780 %}
 4781 
 4782 // Float load constant
 4783 pipe_class fpu_reg_reg_con(regD dst, regD src)
 4784 %{
 4785     instruction_count(3);
 4786     dst    : S5(write);
 4787     src    : S3(read);
 4788     D0     : S0;        // big decoder only for the load
 4789     DECODE : S1(2);     // any decoder for FPU POP
 4790     FPU    : S4;
 4791     MEM    : S3;        // any mem
 4792 %}
 4793 
 4794 // UnConditional branch
 4795 pipe_class pipe_jmp(label labl)
 4796 %{
 4797     single_instruction;
 4798     BR   : S3;
 4799 %}
 4800 
 4801 // Conditional branch
 4802 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 4803 %{
 4804     single_instruction;
 4805     cr    : S1(read);
 4806     BR    : S3;
 4807 %}
 4808 
 4809 // Allocation idiom
 4810 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 4811 %{
 4812     instruction_count(1); force_serialization;
 4813     fixed_latency(6);
 4814     heap_ptr : S3(read);
 4815     DECODE   : S0(3);
 4816     D0       : S2;
 4817     MEM      : S3;
 4818     ALU      : S3(2);
 4819     dst      : S5(write);
 4820     BR       : S5;
 4821 %}
 4822 
 4823 // Generic big/slow expanded idiom
 4824 pipe_class pipe_slow()
 4825 %{
 4826     instruction_count(10); multiple_bundles; force_serialization;
 4827     fixed_latency(100);
 4828     D0  : S0(2);
 4829     MEM : S3(2);
 4830 %}
 4831 
 4832 // The real do-nothing guy
 4833 pipe_class empty()
 4834 %{
 4835     instruction_count(0);
 4836 %}
 4837 
 4838 // Define the class for the Nop node
 4839 define
 4840 %{
 4841    MachNop = empty;
 4842 %}
 4843 
 4844 %}
 4845 
 4846 //----------INSTRUCTIONS-------------------------------------------------------
 4847 //
 4848 // match      -- States which machine-independent subtree may be replaced
 4849 //               by this instruction.
 4850 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4851 //               selection to identify a minimum cost tree of machine
 4852 //               instructions that matches a tree of machine-independent
 4853 //               instructions.
 4854 // format     -- A string providing the disassembly for this instruction.
 4855 //               The value of an instruction's operand may be inserted
 4856 //               by referring to it with a '$' prefix.
 4857 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4858 //               to within an encode class as $primary, $secondary, and $tertiary
 4859 //               rrspectively.  The primary opcode is commonly used to
 4860 //               indicate the type of machine instruction, while secondary
 4861 //               and tertiary are often used for prefix options or addressing
 4862 //               modes.
 4863 // ins_encode -- A list of encode classes with parameters. The encode class
 4864 //               name must have been defined in an 'enc_class' specification
 4865 //               in the encode section of the architecture description.
 4866 
 4867 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4868 // Load Float
 4869 instruct MoveF2VL(vlRegF dst, regF src) %{
 4870   match(Set dst src);
 4871   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4872   ins_encode %{
 4873     ShouldNotReachHere();
 4874   %}
 4875   ins_pipe( fpu_reg_reg );
 4876 %}
 4877 
 4878 // Load Float
 4879 instruct MoveF2LEG(legRegF dst, regF src) %{
 4880   match(Set dst src);
 4881   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4882   ins_encode %{
 4883     ShouldNotReachHere();
 4884   %}
 4885   ins_pipe( fpu_reg_reg );
 4886 %}
 4887 
 4888 // Load Float
 4889 instruct MoveVL2F(regF dst, vlRegF src) %{
 4890   match(Set dst src);
 4891   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4892   ins_encode %{
 4893     ShouldNotReachHere();
 4894   %}
 4895   ins_pipe( fpu_reg_reg );
 4896 %}
 4897 
 4898 // Load Float
 4899 instruct MoveLEG2F(regF dst, legRegF src) %{
 4900   match(Set dst src);
 4901   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4902   ins_encode %{
 4903     ShouldNotReachHere();
 4904   %}
 4905   ins_pipe( fpu_reg_reg );
 4906 %}
 4907 
 4908 // Load Double
 4909 instruct MoveD2VL(vlRegD dst, regD src) %{
 4910   match(Set dst src);
 4911   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4912   ins_encode %{
 4913     ShouldNotReachHere();
 4914   %}
 4915   ins_pipe( fpu_reg_reg );
 4916 %}
 4917 
 4918 // Load Double
 4919 instruct MoveD2LEG(legRegD dst, regD src) %{
 4920   match(Set dst src);
 4921   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4922   ins_encode %{
 4923     ShouldNotReachHere();
 4924   %}
 4925   ins_pipe( fpu_reg_reg );
 4926 %}
 4927 
 4928 // Load Double
 4929 instruct MoveVL2D(regD dst, vlRegD src) %{
 4930   match(Set dst src);
 4931   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4932   ins_encode %{
 4933     ShouldNotReachHere();
 4934   %}
 4935   ins_pipe( fpu_reg_reg );
 4936 %}
 4937 
 4938 // Load Double
 4939 instruct MoveLEG2D(regD dst, legRegD src) %{
 4940   match(Set dst src);
 4941   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4942   ins_encode %{
 4943     ShouldNotReachHere();
 4944   %}
 4945   ins_pipe( fpu_reg_reg );
 4946 %}
 4947 
 4948 //----------Load/Store/Move Instructions---------------------------------------
 4949 //----------Load Instructions--------------------------------------------------
 4950 
 4951 // Load Byte (8 bit signed)
 4952 instruct loadB(rRegI dst, memory mem)
 4953 %{
 4954   match(Set dst (LoadB mem));
 4955 
 4956   ins_cost(125);
 4957   format %{ "movsbl  $dst, $mem\t# byte" %}
 4958 
 4959   ins_encode %{
 4960     __ movsbl($dst$$Register, $mem$$Address);
 4961   %}
 4962 
 4963   ins_pipe(ialu_reg_mem);
 4964 %}
 4965 
 4966 // Load Byte (8 bit signed) into Long Register
 4967 instruct loadB2L(rRegL dst, memory mem)
 4968 %{
 4969   match(Set dst (ConvI2L (LoadB mem)));
 4970 
 4971   ins_cost(125);
 4972   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4973 
 4974   ins_encode %{
 4975     __ movsbq($dst$$Register, $mem$$Address);
 4976   %}
 4977 
 4978   ins_pipe(ialu_reg_mem);
 4979 %}
 4980 
 4981 // Load Unsigned Byte (8 bit UNsigned)
 4982 instruct loadUB(rRegI dst, memory mem)
 4983 %{
 4984   match(Set dst (LoadUB mem));
 4985 
 4986   ins_cost(125);
 4987   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4988 
 4989   ins_encode %{
 4990     __ movzbl($dst$$Register, $mem$$Address);
 4991   %}
 4992 
 4993   ins_pipe(ialu_reg_mem);
 4994 %}
 4995 
 4996 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4997 instruct loadUB2L(rRegL dst, memory mem)
 4998 %{
 4999   match(Set dst (ConvI2L (LoadUB mem)));
 5000 
 5001   ins_cost(125);
 5002   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 5003 
 5004   ins_encode %{
 5005     __ movzbq($dst$$Register, $mem$$Address);
 5006   %}
 5007 
 5008   ins_pipe(ialu_reg_mem);
 5009 %}
 5010 
 5011 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 5012 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5013   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5014   effect(KILL cr);
 5015 
 5016   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 5017             "andl    $dst, right_n_bits($mask, 8)" %}
 5018   ins_encode %{
 5019     Register Rdst = $dst$$Register;
 5020     __ movzbq(Rdst, $mem$$Address);
 5021     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5022   %}
 5023   ins_pipe(ialu_reg_mem);
 5024 %}
 5025 
 5026 // Load Short (16 bit signed)
 5027 instruct loadS(rRegI dst, memory mem)
 5028 %{
 5029   match(Set dst (LoadS mem));
 5030 
 5031   ins_cost(125);
 5032   format %{ "movswl $dst, $mem\t# short" %}
 5033 
 5034   ins_encode %{
 5035     __ movswl($dst$$Register, $mem$$Address);
 5036   %}
 5037 
 5038   ins_pipe(ialu_reg_mem);
 5039 %}
 5040 
 5041 // Load Short (16 bit signed) to Byte (8 bit signed)
 5042 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5043   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5044 
 5045   ins_cost(125);
 5046   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 5047   ins_encode %{
 5048     __ movsbl($dst$$Register, $mem$$Address);
 5049   %}
 5050   ins_pipe(ialu_reg_mem);
 5051 %}
 5052 
 5053 // Load Short (16 bit signed) into Long Register
 5054 instruct loadS2L(rRegL dst, memory mem)
 5055 %{
 5056   match(Set dst (ConvI2L (LoadS mem)));
 5057 
 5058   ins_cost(125);
 5059   format %{ "movswq $dst, $mem\t# short -> long" %}
 5060 
 5061   ins_encode %{
 5062     __ movswq($dst$$Register, $mem$$Address);
 5063   %}
 5064 
 5065   ins_pipe(ialu_reg_mem);
 5066 %}
 5067 
 5068 // Load Unsigned Short/Char (16 bit UNsigned)
 5069 instruct loadUS(rRegI dst, memory mem)
 5070 %{
 5071   match(Set dst (LoadUS mem));
 5072 
 5073   ins_cost(125);
 5074   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 5075 
 5076   ins_encode %{
 5077     __ movzwl($dst$$Register, $mem$$Address);
 5078   %}
 5079 
 5080   ins_pipe(ialu_reg_mem);
 5081 %}
 5082 
 5083 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5084 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5085   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5086 
 5087   ins_cost(125);
 5088   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 5089   ins_encode %{
 5090     __ movsbl($dst$$Register, $mem$$Address);
 5091   %}
 5092   ins_pipe(ialu_reg_mem);
 5093 %}
 5094 
 5095 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5096 instruct loadUS2L(rRegL dst, memory mem)
 5097 %{
 5098   match(Set dst (ConvI2L (LoadUS mem)));
 5099 
 5100   ins_cost(125);
 5101   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 5102 
 5103   ins_encode %{
 5104     __ movzwq($dst$$Register, $mem$$Address);
 5105   %}
 5106 
 5107   ins_pipe(ialu_reg_mem);
 5108 %}
 5109 
 5110 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5111 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5112   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5113 
 5114   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 5115   ins_encode %{
 5116     __ movzbq($dst$$Register, $mem$$Address);
 5117   %}
 5118   ins_pipe(ialu_reg_mem);
 5119 %}
 5120 
 5121 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 5122 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5123   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5124   effect(KILL cr);
 5125 
 5126   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5127             "andl    $dst, right_n_bits($mask, 16)" %}
 5128   ins_encode %{
 5129     Register Rdst = $dst$$Register;
 5130     __ movzwq(Rdst, $mem$$Address);
 5131     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5132   %}
 5133   ins_pipe(ialu_reg_mem);
 5134 %}
 5135 
 5136 // Load Integer
 5137 instruct loadI(rRegI dst, memory mem)
 5138 %{
 5139   match(Set dst (LoadI mem));
 5140 
 5141   ins_cost(125);
 5142   format %{ "movl    $dst, $mem\t# int" %}
 5143 
 5144   ins_encode %{
 5145     __ movl($dst$$Register, $mem$$Address);
 5146   %}
 5147 
 5148   ins_pipe(ialu_reg_mem);
 5149 %}
 5150 
 5151 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5152 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5153   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5154 
 5155   ins_cost(125);
 5156   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 5157   ins_encode %{
 5158     __ movsbl($dst$$Register, $mem$$Address);
 5159   %}
 5160   ins_pipe(ialu_reg_mem);
 5161 %}
 5162 
 5163 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5164 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5165   match(Set dst (AndI (LoadI mem) mask));
 5166 
 5167   ins_cost(125);
 5168   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 5169   ins_encode %{
 5170     __ movzbl($dst$$Register, $mem$$Address);
 5171   %}
 5172   ins_pipe(ialu_reg_mem);
 5173 %}
 5174 
 5175 // Load Integer (32 bit signed) to Short (16 bit signed)
 5176 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5177   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5178 
 5179   ins_cost(125);
 5180   format %{ "movswl  $dst, $mem\t# int -> short" %}
 5181   ins_encode %{
 5182     __ movswl($dst$$Register, $mem$$Address);
 5183   %}
 5184   ins_pipe(ialu_reg_mem);
 5185 %}
 5186 
 5187 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5188 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5189   match(Set dst (AndI (LoadI mem) mask));
 5190 
 5191   ins_cost(125);
 5192   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 5193   ins_encode %{
 5194     __ movzwl($dst$$Register, $mem$$Address);
 5195   %}
 5196   ins_pipe(ialu_reg_mem);
 5197 %}
 5198 
 5199 // Load Integer into Long Register
 5200 instruct loadI2L(rRegL dst, memory mem)
 5201 %{
 5202   match(Set dst (ConvI2L (LoadI mem)));
 5203 
 5204   ins_cost(125);
 5205   format %{ "movslq  $dst, $mem\t# int -> long" %}
 5206 
 5207   ins_encode %{
 5208     __ movslq($dst$$Register, $mem$$Address);
 5209   %}
 5210 
 5211   ins_pipe(ialu_reg_mem);
 5212 %}
 5213 
 5214 // Load Integer with mask 0xFF into Long Register
 5215 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5216   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5217 
 5218   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 5219   ins_encode %{
 5220     __ movzbq($dst$$Register, $mem$$Address);
 5221   %}
 5222   ins_pipe(ialu_reg_mem);
 5223 %}
 5224 
 5225 // Load Integer with mask 0xFFFF into Long Register
 5226 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 5227   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5228 
 5229   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 5230   ins_encode %{
 5231     __ movzwq($dst$$Register, $mem$$Address);
 5232   %}
 5233   ins_pipe(ialu_reg_mem);
 5234 %}
 5235 
 5236 // Load Integer with a 31-bit mask into Long Register
 5237 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 5238   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5239   effect(KILL cr);
 5240 
 5241   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 5242             "andl    $dst, $mask" %}
 5243   ins_encode %{
 5244     Register Rdst = $dst$$Register;
 5245     __ movl(Rdst, $mem$$Address);
 5246     __ andl(Rdst, $mask$$constant);
 5247   %}
 5248   ins_pipe(ialu_reg_mem);
 5249 %}
 5250 
 5251 // Load Unsigned Integer into Long Register
 5252 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 5253 %{
 5254   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5255 
 5256   ins_cost(125);
 5257   format %{ "movl    $dst, $mem\t# uint -> long" %}
 5258 
 5259   ins_encode %{
 5260     __ movl($dst$$Register, $mem$$Address);
 5261   %}
 5262 
 5263   ins_pipe(ialu_reg_mem);
 5264 %}
 5265 
 5266 // Load Long
 5267 instruct loadL(rRegL dst, memory mem)
 5268 %{
 5269   match(Set dst (LoadL mem));
 5270 
 5271   ins_cost(125);
 5272   format %{ "movq    $dst, $mem\t# long" %}
 5273 
 5274   ins_encode %{
 5275     __ movq($dst$$Register, $mem$$Address);
 5276   %}
 5277 
 5278   ins_pipe(ialu_reg_mem); // XXX
 5279 %}
 5280 
 5281 // Load Range
 5282 instruct loadRange(rRegI dst, memory mem)
 5283 %{
 5284   match(Set dst (LoadRange mem));
 5285 
 5286   ins_cost(125); // XXX
 5287   format %{ "movl    $dst, $mem\t# range" %}
 5288   ins_encode %{
 5289     __ movl($dst$$Register, $mem$$Address);
 5290   %}
 5291   ins_pipe(ialu_reg_mem);
 5292 %}
 5293 
 5294 // Load Pointer
 5295 instruct loadP(rRegP dst, memory mem)
 5296 %{
 5297   match(Set dst (LoadP mem));
 5298   predicate(n->as_Load()->barrier_data() == 0);
 5299 
 5300   ins_cost(125); // XXX
 5301   format %{ "movq    $dst, $mem\t# ptr" %}
 5302   ins_encode %{
 5303     __ movq($dst$$Register, $mem$$Address);
 5304   %}
 5305   ins_pipe(ialu_reg_mem); // XXX
 5306 %}
 5307 
 5308 // Load Compressed Pointer
 5309 instruct loadN(rRegN dst, memory mem)
 5310 %{
 5311    match(Set dst (LoadN mem));
 5312 
 5313    ins_cost(125); // XXX
 5314    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 5315    ins_encode %{
 5316      __ movl($dst$$Register, $mem$$Address);
 5317    %}
 5318    ins_pipe(ialu_reg_mem); // XXX
 5319 %}
 5320 
 5321 
 5322 // Load Klass Pointer
 5323 instruct loadKlass(rRegP dst, memory mem)
 5324 %{
 5325   match(Set dst (LoadKlass mem));
 5326 
 5327   ins_cost(125); // XXX
 5328   format %{ "movq    $dst, $mem\t# class" %}
 5329   ins_encode %{
 5330     __ movq($dst$$Register, $mem$$Address);
 5331   %}
 5332   ins_pipe(ialu_reg_mem); // XXX
 5333 %}
 5334 
 5335 // Load narrow Klass Pointer
 5336 instruct loadNKlass(rRegN dst, memory mem)
 5337 %{
 5338   match(Set dst (LoadNKlass mem));
 5339 
 5340   ins_cost(125); // XXX
 5341   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5342   ins_encode %{
 5343     __ movl($dst$$Register, $mem$$Address);
 5344   %}
 5345   ins_pipe(ialu_reg_mem); // XXX
 5346 %}
 5347 
 5348 // Load Float
 5349 instruct loadF(regF dst, memory mem)
 5350 %{
 5351   match(Set dst (LoadF mem));
 5352 
 5353   ins_cost(145); // XXX
 5354   format %{ "movss   $dst, $mem\t# float" %}
 5355   ins_encode %{
 5356     __ movflt($dst$$XMMRegister, $mem$$Address);
 5357   %}
 5358   ins_pipe(pipe_slow); // XXX
 5359 %}
 5360 
 5361 // Load Double
 5362 instruct loadD_partial(regD dst, memory mem)
 5363 %{
 5364   predicate(!UseXmmLoadAndClearUpper);
 5365   match(Set dst (LoadD mem));
 5366 
 5367   ins_cost(145); // XXX
 5368   format %{ "movlpd  $dst, $mem\t# double" %}
 5369   ins_encode %{
 5370     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5371   %}
 5372   ins_pipe(pipe_slow); // XXX
 5373 %}
 5374 
 5375 instruct loadD(regD dst, memory mem)
 5376 %{
 5377   predicate(UseXmmLoadAndClearUpper);
 5378   match(Set dst (LoadD mem));
 5379 
 5380   ins_cost(145); // XXX
 5381   format %{ "movsd   $dst, $mem\t# double" %}
 5382   ins_encode %{
 5383     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5384   %}
 5385   ins_pipe(pipe_slow); // XXX
 5386 %}
 5387 
 5388 
 5389 // Following pseudo code describes the algorithm for max[FD]:
 5390 // Min algorithm is on similar lines
 5391 //  btmp = (b < +0.0) ? a : b
 5392 //  atmp = (b < +0.0) ? b : a
 5393 //  Tmp  = Max_Float(atmp , btmp)
 5394 //  Res  = (atmp == NaN) ? atmp : Tmp
 5395 
 5396 // max = java.lang.Math.max(float a, float b)
 5397 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5398   predicate(UseAVX > 0 && !n->is_reduction());
 5399   match(Set dst (MaxF a b));
 5400   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5401   format %{
 5402      "vblendvps        $btmp,$b,$a,$b           \n\t"
 5403      "vblendvps        $atmp,$a,$b,$b           \n\t"
 5404      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 5405      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 5406      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 5407   %}
 5408   ins_encode %{
 5409     int vector_len = Assembler::AVX_128bit;
 5410     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5411     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5412     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5413     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5414     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5415  %}
 5416   ins_pipe( pipe_slow );
 5417 %}
 5418 
 5419 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5420   predicate(UseAVX > 0 && n->is_reduction());
 5421   match(Set dst (MaxF a b));
 5422   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5423 
 5424   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 5425   ins_encode %{
 5426     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5427                     false /*min*/, true /*single*/);
 5428   %}
 5429   ins_pipe( pipe_slow );
 5430 %}
 5431 
 5432 // max = java.lang.Math.max(double a, double b)
 5433 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5434   predicate(UseAVX > 0 && !n->is_reduction());
 5435   match(Set dst (MaxD a b));
 5436   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 5437   format %{
 5438      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 5439      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 5440      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 5441      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 5442      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 5443   %}
 5444   ins_encode %{
 5445     int vector_len = Assembler::AVX_128bit;
 5446     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5447     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5448     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5449     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5450     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5451   %}
 5452   ins_pipe( pipe_slow );
 5453 %}
 5454 
 5455 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5456   predicate(UseAVX > 0 && n->is_reduction());
 5457   match(Set dst (MaxD a b));
 5458   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5459 
 5460   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 5461   ins_encode %{
 5462     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5463                     false /*min*/, false /*single*/);
 5464   %}
 5465   ins_pipe( pipe_slow );
 5466 %}
 5467 
 5468 // min = java.lang.Math.min(float a, float b)
 5469 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5470   predicate(UseAVX > 0 && !n->is_reduction());
 5471   match(Set dst (MinF a b));
 5472   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5473   format %{
 5474      "vblendvps        $atmp,$a,$b,$a             \n\t"
 5475      "vblendvps        $btmp,$b,$a,$a             \n\t"
 5476      "vminss           $tmp,$atmp,$btmp           \n\t"
 5477      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 5478      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 5479   %}
 5480   ins_encode %{
 5481     int vector_len = Assembler::AVX_128bit;
 5482     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5483     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5484     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5485     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5486     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5487   %}
 5488   ins_pipe( pipe_slow );
 5489 %}
 5490 
 5491 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5492   predicate(UseAVX > 0 && n->is_reduction());
 5493   match(Set dst (MinF a b));
 5494   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5495 
 5496   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 5497   ins_encode %{
 5498     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5499                     true /*min*/, true /*single*/);
 5500   %}
 5501   ins_pipe( pipe_slow );
 5502 %}
 5503 
 5504 // min = java.lang.Math.min(double a, double b)
 5505 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5506   predicate(UseAVX > 0 && !n->is_reduction());
 5507   match(Set dst (MinD a b));
 5508   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5509   format %{
 5510      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 5511      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 5512      "vminsd           $tmp,$atmp,$btmp         \n\t"
 5513      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 5514      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 5515   %}
 5516   ins_encode %{
 5517     int vector_len = Assembler::AVX_128bit;
 5518     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5519     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5520     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5521     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5522     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5523   %}
 5524   ins_pipe( pipe_slow );
 5525 %}
 5526 
 5527 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5528   predicate(UseAVX > 0 && n->is_reduction());
 5529   match(Set dst (MinD a b));
 5530   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5531 
 5532   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 5533   ins_encode %{
 5534     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5535                     true /*min*/, false /*single*/);
 5536   %}
 5537   ins_pipe( pipe_slow );
 5538 %}
 5539 
 5540 // Load Effective Address
 5541 instruct leaP8(rRegP dst, indOffset8 mem)
 5542 %{
 5543   match(Set dst mem);
 5544 
 5545   ins_cost(110); // XXX
 5546   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 5547   ins_encode %{
 5548     __ leaq($dst$$Register, $mem$$Address);
 5549   %}
 5550   ins_pipe(ialu_reg_reg_fat);
 5551 %}
 5552 
 5553 instruct leaP32(rRegP dst, indOffset32 mem)
 5554 %{
 5555   match(Set dst mem);
 5556 
 5557   ins_cost(110);
 5558   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 5559   ins_encode %{
 5560     __ leaq($dst$$Register, $mem$$Address);
 5561   %}
 5562   ins_pipe(ialu_reg_reg_fat);
 5563 %}
 5564 
 5565 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 5566 %{
 5567   match(Set dst mem);
 5568 
 5569   ins_cost(110);
 5570   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 5571   ins_encode %{
 5572     __ leaq($dst$$Register, $mem$$Address);
 5573   %}
 5574   ins_pipe(ialu_reg_reg_fat);
 5575 %}
 5576 
 5577 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 5578 %{
 5579   match(Set dst mem);
 5580 
 5581   ins_cost(110);
 5582   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5583   ins_encode %{
 5584     __ leaq($dst$$Register, $mem$$Address);
 5585   %}
 5586   ins_pipe(ialu_reg_reg_fat);
 5587 %}
 5588 
 5589 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 5590 %{
 5591   match(Set dst mem);
 5592 
 5593   ins_cost(110);
 5594   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5595   ins_encode %{
 5596     __ leaq($dst$$Register, $mem$$Address);
 5597   %}
 5598   ins_pipe(ialu_reg_reg_fat);
 5599 %}
 5600 
 5601 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 5602 %{
 5603   match(Set dst mem);
 5604 
 5605   ins_cost(110);
 5606   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 5607   ins_encode %{
 5608     __ leaq($dst$$Register, $mem$$Address);
 5609   %}
 5610   ins_pipe(ialu_reg_reg_fat);
 5611 %}
 5612 
 5613 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 5614 %{
 5615   match(Set dst mem);
 5616 
 5617   ins_cost(110);
 5618   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 5619   ins_encode %{
 5620     __ leaq($dst$$Register, $mem$$Address);
 5621   %}
 5622   ins_pipe(ialu_reg_reg_fat);
 5623 %}
 5624 
 5625 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 5626 %{
 5627   match(Set dst mem);
 5628 
 5629   ins_cost(110);
 5630   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 5631   ins_encode %{
 5632     __ leaq($dst$$Register, $mem$$Address);
 5633   %}
 5634   ins_pipe(ialu_reg_reg_fat);
 5635 %}
 5636 
 5637 // Load Effective Address which uses Narrow (32-bits) oop
 5638 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 5639 %{
 5640   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 5641   match(Set dst mem);
 5642 
 5643   ins_cost(110);
 5644   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 5645   ins_encode %{
 5646     __ leaq($dst$$Register, $mem$$Address);
 5647   %}
 5648   ins_pipe(ialu_reg_reg_fat);
 5649 %}
 5650 
 5651 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 5652 %{
 5653   predicate(CompressedOops::shift() == 0);
 5654   match(Set dst mem);
 5655 
 5656   ins_cost(110); // XXX
 5657   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 5658   ins_encode %{
 5659     __ leaq($dst$$Register, $mem$$Address);
 5660   %}
 5661   ins_pipe(ialu_reg_reg_fat);
 5662 %}
 5663 
 5664 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 5665 %{
 5666   predicate(CompressedOops::shift() == 0);
 5667   match(Set dst mem);
 5668 
 5669   ins_cost(110);
 5670   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 5671   ins_encode %{
 5672     __ leaq($dst$$Register, $mem$$Address);
 5673   %}
 5674   ins_pipe(ialu_reg_reg_fat);
 5675 %}
 5676 
 5677 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 5678 %{
 5679   predicate(CompressedOops::shift() == 0);
 5680   match(Set dst mem);
 5681 
 5682   ins_cost(110);
 5683   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 5684   ins_encode %{
 5685     __ leaq($dst$$Register, $mem$$Address);
 5686   %}
 5687   ins_pipe(ialu_reg_reg_fat);
 5688 %}
 5689 
 5690 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 5691 %{
 5692   predicate(CompressedOops::shift() == 0);
 5693   match(Set dst mem);
 5694 
 5695   ins_cost(110);
 5696   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 5697   ins_encode %{
 5698     __ leaq($dst$$Register, $mem$$Address);
 5699   %}
 5700   ins_pipe(ialu_reg_reg_fat);
 5701 %}
 5702 
 5703 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 5704 %{
 5705   predicate(CompressedOops::shift() == 0);
 5706   match(Set dst mem);
 5707 
 5708   ins_cost(110);
 5709   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 5710   ins_encode %{
 5711     __ leaq($dst$$Register, $mem$$Address);
 5712   %}
 5713   ins_pipe(ialu_reg_reg_fat);
 5714 %}
 5715 
 5716 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 5717 %{
 5718   predicate(CompressedOops::shift() == 0);
 5719   match(Set dst mem);
 5720 
 5721   ins_cost(110);
 5722   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 5723   ins_encode %{
 5724     __ leaq($dst$$Register, $mem$$Address);
 5725   %}
 5726   ins_pipe(ialu_reg_reg_fat);
 5727 %}
 5728 
 5729 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 5730 %{
 5731   predicate(CompressedOops::shift() == 0);
 5732   match(Set dst mem);
 5733 
 5734   ins_cost(110);
 5735   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 5736   ins_encode %{
 5737     __ leaq($dst$$Register, $mem$$Address);
 5738   %}
 5739   ins_pipe(ialu_reg_reg_fat);
 5740 %}
 5741 
 5742 instruct loadConI(rRegI dst, immI src)
 5743 %{
 5744   match(Set dst src);
 5745 
 5746   format %{ "movl    $dst, $src\t# int" %}
 5747   ins_encode %{
 5748     __ movl($dst$$Register, $src$$constant);
 5749   %}
 5750   ins_pipe(ialu_reg_fat); // XXX
 5751 %}
 5752 
 5753 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 5754 %{
 5755   match(Set dst src);
 5756   effect(KILL cr);
 5757 
 5758   ins_cost(50);
 5759   format %{ "xorl    $dst, $dst\t# int" %}
 5760   ins_encode %{
 5761     __ xorl($dst$$Register, $dst$$Register);
 5762   %}
 5763   ins_pipe(ialu_reg);
 5764 %}
 5765 
 5766 instruct loadConL(rRegL dst, immL src)
 5767 %{
 5768   match(Set dst src);
 5769 
 5770   ins_cost(150);
 5771   format %{ "movq    $dst, $src\t# long" %}
 5772   ins_encode %{
 5773     __ mov64($dst$$Register, $src$$constant);
 5774   %}
 5775   ins_pipe(ialu_reg);
 5776 %}
 5777 
 5778 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 5779 %{
 5780   match(Set dst src);
 5781   effect(KILL cr);
 5782 
 5783   ins_cost(50);
 5784   format %{ "xorl    $dst, $dst\t# long" %}
 5785   ins_encode %{
 5786     __ xorl($dst$$Register, $dst$$Register);
 5787   %}
 5788   ins_pipe(ialu_reg); // XXX
 5789 %}
 5790 
 5791 instruct loadConUL32(rRegL dst, immUL32 src)
 5792 %{
 5793   match(Set dst src);
 5794 
 5795   ins_cost(60);
 5796   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 5797   ins_encode %{
 5798     __ movl($dst$$Register, $src$$constant);
 5799   %}
 5800   ins_pipe(ialu_reg);
 5801 %}
 5802 
 5803 instruct loadConL32(rRegL dst, immL32 src)
 5804 %{
 5805   match(Set dst src);
 5806 
 5807   ins_cost(70);
 5808   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 5809   ins_encode %{
 5810     __ movq($dst$$Register, $src$$constant);
 5811   %}
 5812   ins_pipe(ialu_reg);
 5813 %}
 5814 
 5815 instruct loadConP(rRegP dst, immP con) %{
 5816   match(Set dst con);
 5817 
 5818   format %{ "movq    $dst, $con\t# ptr" %}
 5819   ins_encode %{
 5820     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 5821   %}
 5822   ins_pipe(ialu_reg_fat); // XXX
 5823 %}
 5824 
 5825 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 5826 %{
 5827   match(Set dst src);
 5828   effect(KILL cr);
 5829 
 5830   ins_cost(50);
 5831   format %{ "xorl    $dst, $dst\t# ptr" %}
 5832   ins_encode %{
 5833     __ xorl($dst$$Register, $dst$$Register);
 5834   %}
 5835   ins_pipe(ialu_reg);
 5836 %}
 5837 
 5838 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 5839 %{
 5840   match(Set dst src);
 5841   effect(KILL cr);
 5842 
 5843   ins_cost(60);
 5844   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5845   ins_encode %{
 5846     __ movl($dst$$Register, $src$$constant);
 5847   %}
 5848   ins_pipe(ialu_reg);
 5849 %}
 5850 
 5851 instruct loadConF(regF dst, immF con) %{
 5852   match(Set dst con);
 5853   ins_cost(125);
 5854   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5855   ins_encode %{
 5856     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5857   %}
 5858   ins_pipe(pipe_slow);
 5859 %}
 5860 
 5861 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5862   match(Set dst src);
 5863   effect(KILL cr);
 5864   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 5865   ins_encode %{
 5866     __ xorq($dst$$Register, $dst$$Register);
 5867   %}
 5868   ins_pipe(ialu_reg);
 5869 %}
 5870 
 5871 instruct loadConN(rRegN dst, immN src) %{
 5872   match(Set dst src);
 5873 
 5874   ins_cost(125);
 5875   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5876   ins_encode %{
 5877     address con = (address)$src$$constant;
 5878     if (con == NULL) {
 5879       ShouldNotReachHere();
 5880     } else {
 5881       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5882     }
 5883   %}
 5884   ins_pipe(ialu_reg_fat); // XXX
 5885 %}
 5886 
 5887 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5888   match(Set dst src);
 5889 
 5890   ins_cost(125);
 5891   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5892   ins_encode %{
 5893     address con = (address)$src$$constant;
 5894     if (con == NULL) {
 5895       ShouldNotReachHere();
 5896     } else {
 5897       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5898     }
 5899   %}
 5900   ins_pipe(ialu_reg_fat); // XXX
 5901 %}
 5902 
 5903 instruct loadConF0(regF dst, immF0 src)
 5904 %{
 5905   match(Set dst src);
 5906   ins_cost(100);
 5907 
 5908   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5909   ins_encode %{
 5910     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5911   %}
 5912   ins_pipe(pipe_slow);
 5913 %}
 5914 
 5915 // Use the same format since predicate() can not be used here.
 5916 instruct loadConD(regD dst, immD con) %{
 5917   match(Set dst con);
 5918   ins_cost(125);
 5919   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5920   ins_encode %{
 5921     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5922   %}
 5923   ins_pipe(pipe_slow);
 5924 %}
 5925 
 5926 instruct loadConD0(regD dst, immD0 src)
 5927 %{
 5928   match(Set dst src);
 5929   ins_cost(100);
 5930 
 5931   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5932   ins_encode %{
 5933     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5934   %}
 5935   ins_pipe(pipe_slow);
 5936 %}
 5937 
 5938 instruct loadSSI(rRegI dst, stackSlotI src)
 5939 %{
 5940   match(Set dst src);
 5941 
 5942   ins_cost(125);
 5943   format %{ "movl    $dst, $src\t# int stk" %}
 5944   opcode(0x8B);
 5945   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
 5946   ins_pipe(ialu_reg_mem);
 5947 %}
 5948 
 5949 instruct loadSSL(rRegL dst, stackSlotL src)
 5950 %{
 5951   match(Set dst src);
 5952 
 5953   ins_cost(125);
 5954   format %{ "movq    $dst, $src\t# long stk" %}
 5955   opcode(0x8B);
 5956   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5957   ins_pipe(ialu_reg_mem);
 5958 %}
 5959 
 5960 instruct loadSSP(rRegP dst, stackSlotP src)
 5961 %{
 5962   match(Set dst src);
 5963 
 5964   ins_cost(125);
 5965   format %{ "movq    $dst, $src\t# ptr stk" %}
 5966   opcode(0x8B);
 5967   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5968   ins_pipe(ialu_reg_mem);
 5969 %}
 5970 
 5971 instruct loadSSF(regF dst, stackSlotF src)
 5972 %{
 5973   match(Set dst src);
 5974 
 5975   ins_cost(125);
 5976   format %{ "movss   $dst, $src\t# float stk" %}
 5977   ins_encode %{
 5978     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5979   %}
 5980   ins_pipe(pipe_slow); // XXX
 5981 %}
 5982 
 5983 // Use the same format since predicate() can not be used here.
 5984 instruct loadSSD(regD dst, stackSlotD src)
 5985 %{
 5986   match(Set dst src);
 5987 
 5988   ins_cost(125);
 5989   format %{ "movsd   $dst, $src\t# double stk" %}
 5990   ins_encode  %{
 5991     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5992   %}
 5993   ins_pipe(pipe_slow); // XXX
 5994 %}
 5995 
 5996 // Prefetch instructions for allocation.
 5997 // Must be safe to execute with invalid address (cannot fault).
 5998 
 5999 instruct prefetchAlloc( memory mem ) %{
 6000   predicate(AllocatePrefetchInstr==3);
 6001   match(PrefetchAllocation mem);
 6002   ins_cost(125);
 6003 
 6004   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 6005   ins_encode %{
 6006     __ prefetchw($mem$$Address);
 6007   %}
 6008   ins_pipe(ialu_mem);
 6009 %}
 6010 
 6011 instruct prefetchAllocNTA( memory mem ) %{
 6012   predicate(AllocatePrefetchInstr==0);
 6013   match(PrefetchAllocation mem);
 6014   ins_cost(125);
 6015 
 6016   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 6017   ins_encode %{
 6018     __ prefetchnta($mem$$Address);
 6019   %}
 6020   ins_pipe(ialu_mem);
 6021 %}
 6022 
 6023 instruct prefetchAllocT0( memory mem ) %{
 6024   predicate(AllocatePrefetchInstr==1);
 6025   match(PrefetchAllocation mem);
 6026   ins_cost(125);
 6027 
 6028   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 6029   ins_encode %{
 6030     __ prefetcht0($mem$$Address);
 6031   %}
 6032   ins_pipe(ialu_mem);
 6033 %}
 6034 
 6035 instruct prefetchAllocT2( memory mem ) %{
 6036   predicate(AllocatePrefetchInstr==2);
 6037   match(PrefetchAllocation mem);
 6038   ins_cost(125);
 6039 
 6040   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 6041   ins_encode %{
 6042     __ prefetcht2($mem$$Address);
 6043   %}
 6044   ins_pipe(ialu_mem);
 6045 %}
 6046 
 6047 //----------Store Instructions-------------------------------------------------
 6048 
 6049 // Store Byte
 6050 instruct storeB(memory mem, rRegI src)
 6051 %{
 6052   match(Set mem (StoreB mem src));
 6053 
 6054   ins_cost(125); // XXX
 6055   format %{ "movb    $mem, $src\t# byte" %}
 6056   ins_encode %{
 6057     __ movb($mem$$Address, $src$$Register);
 6058   %}
 6059   ins_pipe(ialu_mem_reg);
 6060 %}
 6061 
 6062 // Store Char/Short
 6063 instruct storeC(memory mem, rRegI src)
 6064 %{
 6065   match(Set mem (StoreC mem src));
 6066 
 6067   ins_cost(125); // XXX
 6068   format %{ "movw    $mem, $src\t# char/short" %}
 6069   ins_encode %{
 6070     __ movw($mem$$Address, $src$$Register);
 6071   %}
 6072   ins_pipe(ialu_mem_reg);
 6073 %}
 6074 
 6075 // Store Integer
 6076 instruct storeI(memory mem, rRegI src)
 6077 %{
 6078   match(Set mem (StoreI mem src));
 6079 
 6080   ins_cost(125); // XXX
 6081   format %{ "movl    $mem, $src\t# int" %}
 6082   ins_encode %{
 6083     __ movl($mem$$Address, $src$$Register);
 6084   %}
 6085   ins_pipe(ialu_mem_reg);
 6086 %}
 6087 
 6088 // Store Long
 6089 instruct storeL(memory mem, rRegL src)
 6090 %{
 6091   match(Set mem (StoreL mem src));
 6092 
 6093   ins_cost(125); // XXX
 6094   format %{ "movq    $mem, $src\t# long" %}
 6095   ins_encode %{
 6096     __ movq($mem$$Address, $src$$Register);
 6097   %}
 6098   ins_pipe(ialu_mem_reg); // XXX
 6099 %}
 6100 
 6101 // Store Pointer
 6102 instruct storeP(memory mem, any_RegP src)
 6103 %{
 6104   predicate(n->as_Store()->barrier_data() == 0);
 6105   match(Set mem (StoreP mem src));
 6106 
 6107   ins_cost(125); // XXX
 6108   format %{ "movq    $mem, $src\t# ptr" %}
 6109   ins_encode %{
 6110     __ movq($mem$$Address, $src$$Register);
 6111   %}
 6112   ins_pipe(ialu_mem_reg);
 6113 %}
 6114 
 6115 instruct storeImmP0(memory mem, immP0 zero)
 6116 %{
 6117   predicate(UseCompressedOops && (CompressedOops::base() == NULL) && n->as_Store()->barrier_data() == 0);
 6118   match(Set mem (StoreP mem zero));
 6119 
 6120   ins_cost(125); // XXX
 6121   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6122   ins_encode %{
 6123     __ movq($mem$$Address, r12);
 6124   %}
 6125   ins_pipe(ialu_mem_reg);
 6126 %}
 6127 
 6128 // Store NULL Pointer, mark word, or other simple pointer constant.
 6129 instruct storeImmP(memory mem, immP31 src)
 6130 %{
 6131   predicate(n->as_Store()->barrier_data() == 0);
 6132   match(Set mem (StoreP mem src));
 6133 
 6134   ins_cost(150); // XXX
 6135   format %{ "movq    $mem, $src\t# ptr" %}
 6136   ins_encode %{
 6137     __ movq($mem$$Address, $src$$constant);
 6138   %}
 6139   ins_pipe(ialu_mem_imm);
 6140 %}
 6141 
 6142 // Store Compressed Pointer
 6143 instruct storeN(memory mem, rRegN src)
 6144 %{
 6145   match(Set mem (StoreN mem src));
 6146 
 6147   ins_cost(125); // XXX
 6148   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6149   ins_encode %{
 6150     __ movl($mem$$Address, $src$$Register);
 6151   %}
 6152   ins_pipe(ialu_mem_reg);
 6153 %}
 6154 
 6155 instruct storeNKlass(memory mem, rRegN src)
 6156 %{
 6157   match(Set mem (StoreNKlass mem src));
 6158 
 6159   ins_cost(125); // XXX
 6160   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6161   ins_encode %{
 6162     __ movl($mem$$Address, $src$$Register);
 6163   %}
 6164   ins_pipe(ialu_mem_reg);
 6165 %}
 6166 
 6167 instruct storeImmN0(memory mem, immN0 zero)
 6168 %{
 6169   predicate(CompressedOops::base() == NULL);
 6170   match(Set mem (StoreN mem zero));
 6171 
 6172   ins_cost(125); // XXX
 6173   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6174   ins_encode %{
 6175     __ movl($mem$$Address, r12);
 6176   %}
 6177   ins_pipe(ialu_mem_reg);
 6178 %}
 6179 
 6180 instruct storeImmN(memory mem, immN src)
 6181 %{
 6182   match(Set mem (StoreN mem src));
 6183 
 6184   ins_cost(150); // XXX
 6185   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6186   ins_encode %{
 6187     address con = (address)$src$$constant;
 6188     if (con == NULL) {
 6189       __ movl($mem$$Address, 0);
 6190     } else {
 6191       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6192     }
 6193   %}
 6194   ins_pipe(ialu_mem_imm);
 6195 %}
 6196 
 6197 instruct storeImmNKlass(memory mem, immNKlass src)
 6198 %{
 6199   match(Set mem (StoreNKlass mem src));
 6200 
 6201   ins_cost(150); // XXX
 6202   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6203   ins_encode %{
 6204     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6205   %}
 6206   ins_pipe(ialu_mem_imm);
 6207 %}
 6208 
 6209 // Store Integer Immediate
 6210 instruct storeImmI0(memory mem, immI_0 zero)
 6211 %{
 6212   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6213   match(Set mem (StoreI mem zero));
 6214 
 6215   ins_cost(125); // XXX
 6216   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6217   ins_encode %{
 6218     __ movl($mem$$Address, r12);
 6219   %}
 6220   ins_pipe(ialu_mem_reg);
 6221 %}
 6222 
 6223 instruct storeImmI(memory mem, immI src)
 6224 %{
 6225   match(Set mem (StoreI mem src));
 6226 
 6227   ins_cost(150);
 6228   format %{ "movl    $mem, $src\t# int" %}
 6229   ins_encode %{
 6230     __ movl($mem$$Address, $src$$constant);
 6231   %}
 6232   ins_pipe(ialu_mem_imm);
 6233 %}
 6234 
 6235 // Store Long Immediate
 6236 instruct storeImmL0(memory mem, immL0 zero)
 6237 %{
 6238   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6239   match(Set mem (StoreL mem zero));
 6240 
 6241   ins_cost(125); // XXX
 6242   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6243   ins_encode %{
 6244     __ movq($mem$$Address, r12);
 6245   %}
 6246   ins_pipe(ialu_mem_reg);
 6247 %}
 6248 
 6249 instruct storeImmL(memory mem, immL32 src)
 6250 %{
 6251   match(Set mem (StoreL mem src));
 6252 
 6253   ins_cost(150);
 6254   format %{ "movq    $mem, $src\t# long" %}
 6255   ins_encode %{
 6256     __ movq($mem$$Address, $src$$constant);
 6257   %}
 6258   ins_pipe(ialu_mem_imm);
 6259 %}
 6260 
 6261 // Store Short/Char Immediate
 6262 instruct storeImmC0(memory mem, immI_0 zero)
 6263 %{
 6264   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6265   match(Set mem (StoreC mem zero));
 6266 
 6267   ins_cost(125); // XXX
 6268   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6269   ins_encode %{
 6270     __ movw($mem$$Address, r12);
 6271   %}
 6272   ins_pipe(ialu_mem_reg);
 6273 %}
 6274 
 6275 instruct storeImmI16(memory mem, immI16 src)
 6276 %{
 6277   predicate(UseStoreImmI16);
 6278   match(Set mem (StoreC mem src));
 6279 
 6280   ins_cost(150);
 6281   format %{ "movw    $mem, $src\t# short/char" %}
 6282   ins_encode %{
 6283     __ movw($mem$$Address, $src$$constant);
 6284   %}
 6285   ins_pipe(ialu_mem_imm);
 6286 %}
 6287 
 6288 // Store Byte Immediate
 6289 instruct storeImmB0(memory mem, immI_0 zero)
 6290 %{
 6291   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6292   match(Set mem (StoreB mem zero));
 6293 
 6294   ins_cost(125); // XXX
 6295   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6296   ins_encode %{
 6297     __ movb($mem$$Address, r12);
 6298   %}
 6299   ins_pipe(ialu_mem_reg);
 6300 %}
 6301 
 6302 instruct storeImmB(memory mem, immI8 src)
 6303 %{
 6304   match(Set mem (StoreB mem src));
 6305 
 6306   ins_cost(150); // XXX
 6307   format %{ "movb    $mem, $src\t# byte" %}
 6308   ins_encode %{
 6309     __ movb($mem$$Address, $src$$constant);
 6310   %}
 6311   ins_pipe(ialu_mem_imm);
 6312 %}
 6313 
 6314 // Store CMS card-mark Immediate
 6315 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6316 %{
 6317   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6318   match(Set mem (StoreCM mem zero));
 6319 
 6320   ins_cost(125); // XXX
 6321   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6322   ins_encode %{
 6323     __ movb($mem$$Address, r12);
 6324   %}
 6325   ins_pipe(ialu_mem_reg);
 6326 %}
 6327 
 6328 instruct storeImmCM0(memory mem, immI_0 src)
 6329 %{
 6330   match(Set mem (StoreCM mem src));
 6331 
 6332   ins_cost(150); // XXX
 6333   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6334   ins_encode %{
 6335     __ movb($mem$$Address, $src$$constant);
 6336   %}
 6337   ins_pipe(ialu_mem_imm);
 6338 %}
 6339 
 6340 // Store Float
 6341 instruct storeF(memory mem, regF src)
 6342 %{
 6343   match(Set mem (StoreF mem src));
 6344 
 6345   ins_cost(95); // XXX
 6346   format %{ "movss   $mem, $src\t# float" %}
 6347   ins_encode %{
 6348     __ movflt($mem$$Address, $src$$XMMRegister);
 6349   %}
 6350   ins_pipe(pipe_slow); // XXX
 6351 %}
 6352 
 6353 // Store immediate Float value (it is faster than store from XMM register)
 6354 instruct storeF0(memory mem, immF0 zero)
 6355 %{
 6356   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6357   match(Set mem (StoreF mem zero));
 6358 
 6359   ins_cost(25); // XXX
 6360   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6361   ins_encode %{
 6362     __ movl($mem$$Address, r12);
 6363   %}
 6364   ins_pipe(ialu_mem_reg);
 6365 %}
 6366 
 6367 instruct storeF_imm(memory mem, immF src)
 6368 %{
 6369   match(Set mem (StoreF mem src));
 6370 
 6371   ins_cost(50);
 6372   format %{ "movl    $mem, $src\t# float" %}
 6373   ins_encode %{
 6374     __ movl($mem$$Address, jint_cast($src$$constant));
 6375   %}
 6376   ins_pipe(ialu_mem_imm);
 6377 %}
 6378 
 6379 // Store Double
 6380 instruct storeD(memory mem, regD src)
 6381 %{
 6382   match(Set mem (StoreD mem src));
 6383 
 6384   ins_cost(95); // XXX
 6385   format %{ "movsd   $mem, $src\t# double" %}
 6386   ins_encode %{
 6387     __ movdbl($mem$$Address, $src$$XMMRegister);
 6388   %}
 6389   ins_pipe(pipe_slow); // XXX
 6390 %}
 6391 
 6392 // Store immediate double 0.0 (it is faster than store from XMM register)
 6393 instruct storeD0_imm(memory mem, immD0 src)
 6394 %{
 6395   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 6396   match(Set mem (StoreD mem src));
 6397 
 6398   ins_cost(50);
 6399   format %{ "movq    $mem, $src\t# double 0." %}
 6400   ins_encode %{
 6401     __ movq($mem$$Address, $src$$constant);
 6402   %}
 6403   ins_pipe(ialu_mem_imm);
 6404 %}
 6405 
 6406 instruct storeD0(memory mem, immD0 zero)
 6407 %{
 6408   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6409   match(Set mem (StoreD mem zero));
 6410 
 6411   ins_cost(25); // XXX
 6412   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6413   ins_encode %{
 6414     __ movq($mem$$Address, r12);
 6415   %}
 6416   ins_pipe(ialu_mem_reg);
 6417 %}
 6418 
 6419 instruct storeSSI(stackSlotI dst, rRegI src)
 6420 %{
 6421   match(Set dst src);
 6422 
 6423   ins_cost(100);
 6424   format %{ "movl    $dst, $src\t# int stk" %}
 6425   opcode(0x89);
 6426   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6427   ins_pipe( ialu_mem_reg );
 6428 %}
 6429 
 6430 instruct storeSSL(stackSlotL dst, rRegL src)
 6431 %{
 6432   match(Set dst src);
 6433 
 6434   ins_cost(100);
 6435   format %{ "movq    $dst, $src\t# long stk" %}
 6436   opcode(0x89);
 6437   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6438   ins_pipe(ialu_mem_reg);
 6439 %}
 6440 
 6441 instruct storeSSP(stackSlotP dst, rRegP src)
 6442 %{
 6443   match(Set dst src);
 6444 
 6445   ins_cost(100);
 6446   format %{ "movq    $dst, $src\t# ptr stk" %}
 6447   opcode(0x89);
 6448   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6449   ins_pipe(ialu_mem_reg);
 6450 %}
 6451 
 6452 instruct storeSSF(stackSlotF dst, regF src)
 6453 %{
 6454   match(Set dst src);
 6455 
 6456   ins_cost(95); // XXX
 6457   format %{ "movss   $dst, $src\t# float stk" %}
 6458   ins_encode %{
 6459     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6460   %}
 6461   ins_pipe(pipe_slow); // XXX
 6462 %}
 6463 
 6464 instruct storeSSD(stackSlotD dst, regD src)
 6465 %{
 6466   match(Set dst src);
 6467 
 6468   ins_cost(95); // XXX
 6469   format %{ "movsd   $dst, $src\t# double stk" %}
 6470   ins_encode %{
 6471     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6472   %}
 6473   ins_pipe(pipe_slow); // XXX
 6474 %}
 6475 
 6476 instruct cacheWB(indirect addr)
 6477 %{
 6478   predicate(VM_Version::supports_data_cache_line_flush());
 6479   match(CacheWB addr);
 6480 
 6481   ins_cost(100);
 6482   format %{"cache wb $addr" %}
 6483   ins_encode %{
 6484     assert($addr->index_position() < 0, "should be");
 6485     assert($addr$$disp == 0, "should be");
 6486     __ cache_wb(Address($addr$$base$$Register, 0));
 6487   %}
 6488   ins_pipe(pipe_slow); // XXX
 6489 %}
 6490 
 6491 instruct cacheWBPreSync()
 6492 %{
 6493   predicate(VM_Version::supports_data_cache_line_flush());
 6494   match(CacheWBPreSync);
 6495 
 6496   ins_cost(100);
 6497   format %{"cache wb presync" %}
 6498   ins_encode %{
 6499     __ cache_wbsync(true);
 6500   %}
 6501   ins_pipe(pipe_slow); // XXX
 6502 %}
 6503 
 6504 instruct cacheWBPostSync()
 6505 %{
 6506   predicate(VM_Version::supports_data_cache_line_flush());
 6507   match(CacheWBPostSync);
 6508 
 6509   ins_cost(100);
 6510   format %{"cache wb postsync" %}
 6511   ins_encode %{
 6512     __ cache_wbsync(false);
 6513   %}
 6514   ins_pipe(pipe_slow); // XXX
 6515 %}
 6516 
 6517 //----------BSWAP Instructions-------------------------------------------------
 6518 instruct bytes_reverse_int(rRegI dst) %{
 6519   match(Set dst (ReverseBytesI dst));
 6520 
 6521   format %{ "bswapl  $dst" %}
 6522   ins_encode %{
 6523     __ bswapl($dst$$Register);
 6524   %}
 6525   ins_pipe( ialu_reg );
 6526 %}
 6527 
 6528 instruct bytes_reverse_long(rRegL dst) %{
 6529   match(Set dst (ReverseBytesL dst));
 6530 
 6531   format %{ "bswapq  $dst" %}
 6532   ins_encode %{
 6533     __ bswapq($dst$$Register);
 6534   %}
 6535   ins_pipe( ialu_reg);
 6536 %}
 6537 
 6538 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 6539   match(Set dst (ReverseBytesUS dst));
 6540   effect(KILL cr);
 6541 
 6542   format %{ "bswapl  $dst\n\t"
 6543             "shrl    $dst,16\n\t" %}
 6544   ins_encode %{
 6545     __ bswapl($dst$$Register);
 6546     __ shrl($dst$$Register, 16);
 6547   %}
 6548   ins_pipe( ialu_reg );
 6549 %}
 6550 
 6551 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 6552   match(Set dst (ReverseBytesS dst));
 6553   effect(KILL cr);
 6554 
 6555   format %{ "bswapl  $dst\n\t"
 6556             "sar     $dst,16\n\t" %}
 6557   ins_encode %{
 6558     __ bswapl($dst$$Register);
 6559     __ sarl($dst$$Register, 16);
 6560   %}
 6561   ins_pipe( ialu_reg );
 6562 %}
 6563 
 6564 //---------- Zeros Count Instructions ------------------------------------------
 6565 
 6566 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6567   predicate(UseCountLeadingZerosInstruction);
 6568   match(Set dst (CountLeadingZerosI src));
 6569   effect(KILL cr);
 6570 
 6571   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6572   ins_encode %{
 6573     __ lzcntl($dst$$Register, $src$$Register);
 6574   %}
 6575   ins_pipe(ialu_reg);
 6576 %}
 6577 
 6578 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6579   predicate(UseCountLeadingZerosInstruction);
 6580   match(Set dst (CountLeadingZerosI (LoadI src)));
 6581   effect(KILL cr);
 6582   ins_cost(175);
 6583   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6584   ins_encode %{
 6585     __ lzcntl($dst$$Register, $src$$Address);
 6586   %}
 6587   ins_pipe(ialu_reg_mem);
 6588 %}
 6589 
 6590 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 6591   predicate(!UseCountLeadingZerosInstruction);
 6592   match(Set dst (CountLeadingZerosI src));
 6593   effect(KILL cr);
 6594 
 6595   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 6596             "jnz     skip\n\t"
 6597             "movl    $dst, -1\n"
 6598       "skip:\n\t"
 6599             "negl    $dst\n\t"
 6600             "addl    $dst, 31" %}
 6601   ins_encode %{
 6602     Register Rdst = $dst$$Register;
 6603     Register Rsrc = $src$$Register;
 6604     Label skip;
 6605     __ bsrl(Rdst, Rsrc);
 6606     __ jccb(Assembler::notZero, skip);
 6607     __ movl(Rdst, -1);
 6608     __ bind(skip);
 6609     __ negl(Rdst);
 6610     __ addl(Rdst, BitsPerInt - 1);
 6611   %}
 6612   ins_pipe(ialu_reg);
 6613 %}
 6614 
 6615 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6616   predicate(UseCountLeadingZerosInstruction);
 6617   match(Set dst (CountLeadingZerosL src));
 6618   effect(KILL cr);
 6619 
 6620   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6621   ins_encode %{
 6622     __ lzcntq($dst$$Register, $src$$Register);
 6623   %}
 6624   ins_pipe(ialu_reg);
 6625 %}
 6626 
 6627 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6628   predicate(UseCountLeadingZerosInstruction);
 6629   match(Set dst (CountLeadingZerosL (LoadL src)));
 6630   effect(KILL cr);
 6631   ins_cost(175);
 6632   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6633   ins_encode %{
 6634     __ lzcntq($dst$$Register, $src$$Address);
 6635   %}
 6636   ins_pipe(ialu_reg_mem);
 6637 %}
 6638 
 6639 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 6640   predicate(!UseCountLeadingZerosInstruction);
 6641   match(Set dst (CountLeadingZerosL src));
 6642   effect(KILL cr);
 6643 
 6644   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 6645             "jnz     skip\n\t"
 6646             "movl    $dst, -1\n"
 6647       "skip:\n\t"
 6648             "negl    $dst\n\t"
 6649             "addl    $dst, 63" %}
 6650   ins_encode %{
 6651     Register Rdst = $dst$$Register;
 6652     Register Rsrc = $src$$Register;
 6653     Label skip;
 6654     __ bsrq(Rdst, Rsrc);
 6655     __ jccb(Assembler::notZero, skip);
 6656     __ movl(Rdst, -1);
 6657     __ bind(skip);
 6658     __ negl(Rdst);
 6659     __ addl(Rdst, BitsPerLong - 1);
 6660   %}
 6661   ins_pipe(ialu_reg);
 6662 %}
 6663 
 6664 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6665   predicate(UseCountTrailingZerosInstruction);
 6666   match(Set dst (CountTrailingZerosI src));
 6667   effect(KILL cr);
 6668 
 6669   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6670   ins_encode %{
 6671     __ tzcntl($dst$$Register, $src$$Register);
 6672   %}
 6673   ins_pipe(ialu_reg);
 6674 %}
 6675 
 6676 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6677   predicate(UseCountTrailingZerosInstruction);
 6678   match(Set dst (CountTrailingZerosI (LoadI src)));
 6679   effect(KILL cr);
 6680   ins_cost(175);
 6681   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6682   ins_encode %{
 6683     __ tzcntl($dst$$Register, $src$$Address);
 6684   %}
 6685   ins_pipe(ialu_reg_mem);
 6686 %}
 6687 
 6688 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 6689   predicate(!UseCountTrailingZerosInstruction);
 6690   match(Set dst (CountTrailingZerosI src));
 6691   effect(KILL cr);
 6692 
 6693   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 6694             "jnz     done\n\t"
 6695             "movl    $dst, 32\n"
 6696       "done:" %}
 6697   ins_encode %{
 6698     Register Rdst = $dst$$Register;
 6699     Label done;
 6700     __ bsfl(Rdst, $src$$Register);
 6701     __ jccb(Assembler::notZero, done);
 6702     __ movl(Rdst, BitsPerInt);
 6703     __ bind(done);
 6704   %}
 6705   ins_pipe(ialu_reg);
 6706 %}
 6707 
 6708 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6709   predicate(UseCountTrailingZerosInstruction);
 6710   match(Set dst (CountTrailingZerosL src));
 6711   effect(KILL cr);
 6712 
 6713   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6714   ins_encode %{
 6715     __ tzcntq($dst$$Register, $src$$Register);
 6716   %}
 6717   ins_pipe(ialu_reg);
 6718 %}
 6719 
 6720 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6721   predicate(UseCountTrailingZerosInstruction);
 6722   match(Set dst (CountTrailingZerosL (LoadL src)));
 6723   effect(KILL cr);
 6724   ins_cost(175);
 6725   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6726   ins_encode %{
 6727     __ tzcntq($dst$$Register, $src$$Address);
 6728   %}
 6729   ins_pipe(ialu_reg_mem);
 6730 %}
 6731 
 6732 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 6733   predicate(!UseCountTrailingZerosInstruction);
 6734   match(Set dst (CountTrailingZerosL src));
 6735   effect(KILL cr);
 6736 
 6737   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 6738             "jnz     done\n\t"
 6739             "movl    $dst, 64\n"
 6740       "done:" %}
 6741   ins_encode %{
 6742     Register Rdst = $dst$$Register;
 6743     Label done;
 6744     __ bsfq(Rdst, $src$$Register);
 6745     __ jccb(Assembler::notZero, done);
 6746     __ movl(Rdst, BitsPerLong);
 6747     __ bind(done);
 6748   %}
 6749   ins_pipe(ialu_reg);
 6750 %}
 6751 
 6752 //--------------- Reverse Operation Instructions ----------------
 6753 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 6754   predicate(!VM_Version::supports_gfni());
 6755   match(Set dst (ReverseI src));
 6756   effect(TEMP dst, TEMP rtmp, KILL cr);
 6757   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 6758   ins_encode %{
 6759     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 6760   %}
 6761   ins_pipe( ialu_reg );
 6762 %}
 6763 
 6764 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6765   predicate(VM_Version::supports_gfni());
 6766   match(Set dst (ReverseI src));
 6767   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6768   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6769   ins_encode %{
 6770     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 6771   %}
 6772   ins_pipe( ialu_reg );
 6773 %}
 6774 
 6775 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 6776   predicate(!VM_Version::supports_gfni());
 6777   match(Set dst (ReverseL src));
 6778   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 6779   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 6780   ins_encode %{
 6781     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 6782   %}
 6783   ins_pipe( ialu_reg );
 6784 %}
 6785 
 6786 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6787   predicate(VM_Version::supports_gfni());
 6788   match(Set dst (ReverseL src));
 6789   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6790   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6791   ins_encode %{
 6792     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 6793   %}
 6794   ins_pipe( ialu_reg );
 6795 %}
 6796 
 6797 //---------- Population Count Instructions -------------------------------------
 6798 
 6799 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6800   predicate(UsePopCountInstruction);
 6801   match(Set dst (PopCountI src));
 6802   effect(KILL cr);
 6803 
 6804   format %{ "popcnt  $dst, $src" %}
 6805   ins_encode %{
 6806     __ popcntl($dst$$Register, $src$$Register);
 6807   %}
 6808   ins_pipe(ialu_reg);
 6809 %}
 6810 
 6811 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6812   predicate(UsePopCountInstruction);
 6813   match(Set dst (PopCountI (LoadI mem)));
 6814   effect(KILL cr);
 6815 
 6816   format %{ "popcnt  $dst, $mem" %}
 6817   ins_encode %{
 6818     __ popcntl($dst$$Register, $mem$$Address);
 6819   %}
 6820   ins_pipe(ialu_reg);
 6821 %}
 6822 
 6823 // Note: Long.bitCount(long) returns an int.
 6824 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6825   predicate(UsePopCountInstruction);
 6826   match(Set dst (PopCountL src));
 6827   effect(KILL cr);
 6828 
 6829   format %{ "popcnt  $dst, $src" %}
 6830   ins_encode %{
 6831     __ popcntq($dst$$Register, $src$$Register);
 6832   %}
 6833   ins_pipe(ialu_reg);
 6834 %}
 6835 
 6836 // Note: Long.bitCount(long) returns an int.
 6837 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6838   predicate(UsePopCountInstruction);
 6839   match(Set dst (PopCountL (LoadL mem)));
 6840   effect(KILL cr);
 6841 
 6842   format %{ "popcnt  $dst, $mem" %}
 6843   ins_encode %{
 6844     __ popcntq($dst$$Register, $mem$$Address);
 6845   %}
 6846   ins_pipe(ialu_reg);
 6847 %}
 6848 
 6849 
 6850 //----------MemBar Instructions-----------------------------------------------
 6851 // Memory barrier flavors
 6852 
 6853 instruct membar_acquire()
 6854 %{
 6855   match(MemBarAcquire);
 6856   match(LoadFence);
 6857   ins_cost(0);
 6858 
 6859   size(0);
 6860   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6861   ins_encode();
 6862   ins_pipe(empty);
 6863 %}
 6864 
 6865 instruct membar_acquire_lock()
 6866 %{
 6867   match(MemBarAcquireLock);
 6868   ins_cost(0);
 6869 
 6870   size(0);
 6871   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6872   ins_encode();
 6873   ins_pipe(empty);
 6874 %}
 6875 
 6876 instruct membar_release()
 6877 %{
 6878   match(MemBarRelease);
 6879   match(StoreFence);
 6880   ins_cost(0);
 6881 
 6882   size(0);
 6883   format %{ "MEMBAR-release ! (empty encoding)" %}
 6884   ins_encode();
 6885   ins_pipe(empty);
 6886 %}
 6887 
 6888 instruct membar_release_lock()
 6889 %{
 6890   match(MemBarReleaseLock);
 6891   ins_cost(0);
 6892 
 6893   size(0);
 6894   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6895   ins_encode();
 6896   ins_pipe(empty);
 6897 %}
 6898 
 6899 instruct membar_volatile(rFlagsReg cr) %{
 6900   match(MemBarVolatile);
 6901   effect(KILL cr);
 6902   ins_cost(400);
 6903 
 6904   format %{
 6905     $$template
 6906     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6907   %}
 6908   ins_encode %{
 6909     __ membar(Assembler::StoreLoad);
 6910   %}
 6911   ins_pipe(pipe_slow);
 6912 %}
 6913 
 6914 instruct unnecessary_membar_volatile()
 6915 %{
 6916   match(MemBarVolatile);
 6917   predicate(Matcher::post_store_load_barrier(n));
 6918   ins_cost(0);
 6919 
 6920   size(0);
 6921   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6922   ins_encode();
 6923   ins_pipe(empty);
 6924 %}
 6925 
 6926 instruct membar_storestore() %{
 6927   match(MemBarStoreStore);
 6928   match(StoreStoreFence);
 6929   ins_cost(0);
 6930 
 6931   size(0);
 6932   format %{ "MEMBAR-storestore (empty encoding)" %}
 6933   ins_encode( );
 6934   ins_pipe(empty);
 6935 %}
 6936 
 6937 //----------Move Instructions--------------------------------------------------
 6938 
 6939 instruct castX2P(rRegP dst, rRegL src)
 6940 %{
 6941   match(Set dst (CastX2P src));
 6942 
 6943   format %{ "movq    $dst, $src\t# long->ptr" %}
 6944   ins_encode %{
 6945     if ($dst$$reg != $src$$reg) {
 6946       __ movptr($dst$$Register, $src$$Register);
 6947     }
 6948   %}
 6949   ins_pipe(ialu_reg_reg); // XXX
 6950 %}
 6951 
 6952 instruct castN2X(rRegL dst, rRegN src)
 6953 %{
 6954   match(Set dst (CastP2X src));
 6955 
 6956   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6957   ins_encode %{
 6958     if ($dst$$reg != $src$$reg) {
 6959       __ movptr($dst$$Register, $src$$Register);
 6960     }
 6961   %}
 6962   ins_pipe(ialu_reg_reg); // XXX
 6963 %}
 6964 
 6965 instruct castP2X(rRegL dst, rRegP src)
 6966 %{
 6967   match(Set dst (CastP2X src));
 6968 
 6969   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6970   ins_encode %{
 6971     if ($dst$$reg != $src$$reg) {
 6972       __ movptr($dst$$Register, $src$$Register);
 6973     }
 6974   %}
 6975   ins_pipe(ialu_reg_reg); // XXX
 6976 %}
 6977 
 6978 // Convert oop into int for vectors alignment masking
 6979 instruct convP2I(rRegI dst, rRegP src)
 6980 %{
 6981   match(Set dst (ConvL2I (CastP2X src)));
 6982 
 6983   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6984   ins_encode %{
 6985     __ movl($dst$$Register, $src$$Register);
 6986   %}
 6987   ins_pipe(ialu_reg_reg); // XXX
 6988 %}
 6989 
 6990 // Convert compressed oop into int for vectors alignment masking
 6991 // in case of 32bit oops (heap < 4Gb).
 6992 instruct convN2I(rRegI dst, rRegN src)
 6993 %{
 6994   predicate(CompressedOops::shift() == 0);
 6995   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6996 
 6997   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6998   ins_encode %{
 6999     __ movl($dst$$Register, $src$$Register);
 7000   %}
 7001   ins_pipe(ialu_reg_reg); // XXX
 7002 %}
 7003 
 7004 // Convert oop pointer into compressed form
 7005 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 7006   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 7007   match(Set dst (EncodeP src));
 7008   effect(KILL cr);
 7009   format %{ "encode_heap_oop $dst,$src" %}
 7010   ins_encode %{
 7011     Register s = $src$$Register;
 7012     Register d = $dst$$Register;
 7013     if (s != d) {
 7014       __ movq(d, s);
 7015     }
 7016     __ encode_heap_oop(d);
 7017   %}
 7018   ins_pipe(ialu_reg_long);
 7019 %}
 7020 
 7021 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7022   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 7023   match(Set dst (EncodeP src));
 7024   effect(KILL cr);
 7025   format %{ "encode_heap_oop_not_null $dst,$src" %}
 7026   ins_encode %{
 7027     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 7028   %}
 7029   ins_pipe(ialu_reg_long);
 7030 %}
 7031 
 7032 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 7033   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 7034             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 7035   match(Set dst (DecodeN src));
 7036   effect(KILL cr);
 7037   format %{ "decode_heap_oop $dst,$src" %}
 7038   ins_encode %{
 7039     Register s = $src$$Register;
 7040     Register d = $dst$$Register;
 7041     if (s != d) {
 7042       __ movq(d, s);
 7043     }
 7044     __ decode_heap_oop(d);
 7045   %}
 7046   ins_pipe(ialu_reg_long);
 7047 %}
 7048 
 7049 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7050   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 7051             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 7052   match(Set dst (DecodeN src));
 7053   effect(KILL cr);
 7054   format %{ "decode_heap_oop_not_null $dst,$src" %}
 7055   ins_encode %{
 7056     Register s = $src$$Register;
 7057     Register d = $dst$$Register;
 7058     if (s != d) {
 7059       __ decode_heap_oop_not_null(d, s);
 7060     } else {
 7061       __ decode_heap_oop_not_null(d);
 7062     }
 7063   %}
 7064   ins_pipe(ialu_reg_long);
 7065 %}
 7066 
 7067 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7068   match(Set dst (EncodePKlass src));
 7069   effect(TEMP dst, KILL cr);
 7070   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 7071   ins_encode %{
 7072     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7073   %}
 7074   ins_pipe(ialu_reg_long);
 7075 %}
 7076 
 7077 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7078   match(Set dst (DecodeNKlass src));
 7079   effect(TEMP dst, KILL cr);
 7080   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 7081   ins_encode %{
 7082     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7083   %}
 7084   ins_pipe(ialu_reg_long);
 7085 %}
 7086 
 7087 //----------Conditional Move---------------------------------------------------
 7088 // Jump
 7089 // dummy instruction for generating temp registers
 7090 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 7091   match(Jump (LShiftL switch_val shift));
 7092   ins_cost(350);
 7093   predicate(false);
 7094   effect(TEMP dest);
 7095 
 7096   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7097             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 7098   ins_encode %{
 7099     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7100     // to do that and the compiler is using that register as one it can allocate.
 7101     // So we build it all by hand.
 7102     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 7103     // ArrayAddress dispatch(table, index);
 7104     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 7105     __ lea($dest$$Register, $constantaddress);
 7106     __ jmp(dispatch);
 7107   %}
 7108   ins_pipe(pipe_jmp);
 7109 %}
 7110 
 7111 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 7112   match(Jump (AddL (LShiftL switch_val shift) offset));
 7113   ins_cost(350);
 7114   effect(TEMP dest);
 7115 
 7116   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7117             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 7118   ins_encode %{
 7119     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7120     // to do that and the compiler is using that register as one it can allocate.
 7121     // So we build it all by hand.
 7122     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7123     // ArrayAddress dispatch(table, index);
 7124     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7125     __ lea($dest$$Register, $constantaddress);
 7126     __ jmp(dispatch);
 7127   %}
 7128   ins_pipe(pipe_jmp);
 7129 %}
 7130 
 7131 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 7132   match(Jump switch_val);
 7133   ins_cost(350);
 7134   effect(TEMP dest);
 7135 
 7136   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7137             "jmp     [$dest + $switch_val]\n\t" %}
 7138   ins_encode %{
 7139     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7140     // to do that and the compiler is using that register as one it can allocate.
 7141     // So we build it all by hand.
 7142     // Address index(noreg, switch_reg, Address::times_1);
 7143     // ArrayAddress dispatch(table, index);
 7144     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 7145     __ lea($dest$$Register, $constantaddress);
 7146     __ jmp(dispatch);
 7147   %}
 7148   ins_pipe(pipe_jmp);
 7149 %}
 7150 
 7151 // Conditional move
 7152 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7153 %{
 7154   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7155   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7156 
 7157   ins_cost(100); // XXX
 7158   format %{ "setbn$cop $dst\t# signed, int" %}
 7159   ins_encode %{
 7160     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7161     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7162   %}
 7163   ins_pipe(ialu_reg);
 7164 %}
 7165 
 7166 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 7167 %{
 7168   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7169 
 7170   ins_cost(200); // XXX
 7171   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7172   ins_encode %{
 7173     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7174   %}
 7175   ins_pipe(pipe_cmov_reg);
 7176 %}
 7177 
 7178 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7179 %{
 7180   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7181   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7182 
 7183   ins_cost(100); // XXX
 7184   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7185   ins_encode %{
 7186     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7187     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7188   %}
 7189   ins_pipe(ialu_reg);
 7190 %}
 7191 
 7192 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 7193   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7194 
 7195   ins_cost(200); // XXX
 7196   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7197   ins_encode %{
 7198     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7199   %}
 7200   ins_pipe(pipe_cmov_reg);
 7201 %}
 7202 
 7203 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7204 %{
 7205   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7206   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7207 
 7208   ins_cost(100); // XXX
 7209   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7210   ins_encode %{
 7211     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7212     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7213   %}
 7214   ins_pipe(ialu_reg);
 7215 %}
 7216 
 7217 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7218   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7219   ins_cost(200);
 7220   expand %{
 7221     cmovI_regU(cop, cr, dst, src);
 7222   %}
 7223 %}
 7224 
 7225 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7226   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7227   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7228 
 7229   ins_cost(200); // XXX
 7230   format %{ "cmovpl  $dst, $src\n\t"
 7231             "cmovnel $dst, $src" %}
 7232   ins_encode %{
 7233     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7234     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7235   %}
 7236   ins_pipe(pipe_cmov_reg);
 7237 %}
 7238 
 7239 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7240 // inputs of the CMove
 7241 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7242   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7243   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7244 
 7245   ins_cost(200); // XXX
 7246   format %{ "cmovpl  $dst, $src\n\t"
 7247             "cmovnel $dst, $src" %}
 7248   ins_encode %{
 7249     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7250     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7251   %}
 7252   ins_pipe(pipe_cmov_reg);
 7253 %}
 7254 
 7255 // Conditional move
 7256 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 7257   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7258 
 7259   ins_cost(250); // XXX
 7260   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7261   ins_encode %{
 7262     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7263   %}
 7264   ins_pipe(pipe_cmov_mem);
 7265 %}
 7266 
 7267 // Conditional move
 7268 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 7269 %{
 7270   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7271 
 7272   ins_cost(250); // XXX
 7273   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7274   ins_encode %{
 7275     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7276   %}
 7277   ins_pipe(pipe_cmov_mem);
 7278 %}
 7279 
 7280 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 7281   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7282   ins_cost(250);
 7283   expand %{
 7284     cmovI_memU(cop, cr, dst, src);
 7285   %}
 7286 %}
 7287 
 7288 // Conditional move
 7289 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 7290 %{
 7291   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7292 
 7293   ins_cost(200); // XXX
 7294   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 7295   ins_encode %{
 7296     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7297   %}
 7298   ins_pipe(pipe_cmov_reg);
 7299 %}
 7300 
 7301 // Conditional move
 7302 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 7303 %{
 7304   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7305 
 7306   ins_cost(200); // XXX
 7307   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 7308   ins_encode %{
 7309     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7310   %}
 7311   ins_pipe(pipe_cmov_reg);
 7312 %}
 7313 
 7314 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7315   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7316   ins_cost(200);
 7317   expand %{
 7318     cmovN_regU(cop, cr, dst, src);
 7319   %}
 7320 %}
 7321 
 7322 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7323   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7324   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7325 
 7326   ins_cost(200); // XXX
 7327   format %{ "cmovpl  $dst, $src\n\t"
 7328             "cmovnel $dst, $src" %}
 7329   ins_encode %{
 7330     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7331     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7332   %}
 7333   ins_pipe(pipe_cmov_reg);
 7334 %}
 7335 
 7336 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7337 // inputs of the CMove
 7338 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7339   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7340   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 7341 
 7342   ins_cost(200); // XXX
 7343   format %{ "cmovpl  $dst, $src\n\t"
 7344             "cmovnel $dst, $src" %}
 7345   ins_encode %{
 7346     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7347     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7348   %}
 7349   ins_pipe(pipe_cmov_reg);
 7350 %}
 7351 
 7352 // Conditional move
 7353 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 7354 %{
 7355   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7356 
 7357   ins_cost(200); // XXX
 7358   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 7359   ins_encode %{
 7360     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7361   %}
 7362   ins_pipe(pipe_cmov_reg);  // XXX
 7363 %}
 7364 
 7365 // Conditional move
 7366 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 7367 %{
 7368   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7369 
 7370   ins_cost(200); // XXX
 7371   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 7372   ins_encode %{
 7373     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7374   %}
 7375   ins_pipe(pipe_cmov_reg); // XXX
 7376 %}
 7377 
 7378 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7379   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7380   ins_cost(200);
 7381   expand %{
 7382     cmovP_regU(cop, cr, dst, src);
 7383   %}
 7384 %}
 7385 
 7386 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7387   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7388   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7389 
 7390   ins_cost(200); // XXX
 7391   format %{ "cmovpq  $dst, $src\n\t"
 7392             "cmovneq $dst, $src" %}
 7393   ins_encode %{
 7394     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7395     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7396   %}
 7397   ins_pipe(pipe_cmov_reg);
 7398 %}
 7399 
 7400 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7401 // inputs of the CMove
 7402 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7403   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7404   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 7405 
 7406   ins_cost(200); // XXX
 7407   format %{ "cmovpq  $dst, $src\n\t"
 7408             "cmovneq $dst, $src" %}
 7409   ins_encode %{
 7410     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7411     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7412   %}
 7413   ins_pipe(pipe_cmov_reg);
 7414 %}
 7415 
 7416 // DISABLED: Requires the ADLC to emit a bottom_type call that
 7417 // correctly meets the two pointer arguments; one is an incoming
 7418 // register but the other is a memory operand.  ALSO appears to
 7419 // be buggy with implicit null checks.
 7420 //
 7421 //// Conditional move
 7422 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
 7423 //%{
 7424 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7425 //  ins_cost(250);
 7426 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7427 //  opcode(0x0F,0x40);
 7428 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7429 //  ins_pipe( pipe_cmov_mem );
 7430 //%}
 7431 //
 7432 //// Conditional move
 7433 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
 7434 //%{
 7435 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7436 //  ins_cost(250);
 7437 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7438 //  opcode(0x0F,0x40);
 7439 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7440 //  ins_pipe( pipe_cmov_mem );
 7441 //%}
 7442 
 7443 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7444 %{
 7445   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7446   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7447 
 7448   ins_cost(100); // XXX
 7449   format %{ "setbn$cop $dst\t# signed, long" %}
 7450   ins_encode %{
 7451     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7452     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7453   %}
 7454   ins_pipe(ialu_reg);
 7455 %}
 7456 
 7457 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 7458 %{
 7459   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7460 
 7461   ins_cost(200); // XXX
 7462   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7463   ins_encode %{
 7464     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7465   %}
 7466   ins_pipe(pipe_cmov_reg);  // XXX
 7467 %}
 7468 
 7469 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 7470 %{
 7471   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7472 
 7473   ins_cost(200); // XXX
 7474   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7475   ins_encode %{
 7476     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7477   %}
 7478   ins_pipe(pipe_cmov_mem);  // XXX
 7479 %}
 7480 
 7481 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7482 %{
 7483   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7484   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7485 
 7486   ins_cost(100); // XXX
 7487   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7488   ins_encode %{
 7489     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7490     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7491   %}
 7492   ins_pipe(ialu_reg);
 7493 %}
 7494 
 7495 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 7496 %{
 7497   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7498 
 7499   ins_cost(200); // XXX
 7500   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7501   ins_encode %{
 7502     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7503   %}
 7504   ins_pipe(pipe_cmov_reg); // XXX
 7505 %}
 7506 
 7507 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7508 %{
 7509   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7510   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7511 
 7512   ins_cost(100); // XXX
 7513   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7514   ins_encode %{
 7515     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7516     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7517   %}
 7518   ins_pipe(ialu_reg);
 7519 %}
 7520 
 7521 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7522   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7523   ins_cost(200);
 7524   expand %{
 7525     cmovL_regU(cop, cr, dst, src);
 7526   %}
 7527 %}
 7528 
 7529 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7530   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7531   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7532 
 7533   ins_cost(200); // XXX
 7534   format %{ "cmovpq  $dst, $src\n\t"
 7535             "cmovneq $dst, $src" %}
 7536   ins_encode %{
 7537     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7538     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7539   %}
 7540   ins_pipe(pipe_cmov_reg);
 7541 %}
 7542 
 7543 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7544 // inputs of the CMove
 7545 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7546   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7547   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7548 
 7549   ins_cost(200); // XXX
 7550   format %{ "cmovpq  $dst, $src\n\t"
 7551             "cmovneq $dst, $src" %}
 7552   ins_encode %{
 7553     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7554     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7555   %}
 7556   ins_pipe(pipe_cmov_reg);
 7557 %}
 7558 
 7559 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 7560 %{
 7561   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7562 
 7563   ins_cost(200); // XXX
 7564   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7565   ins_encode %{
 7566     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7567   %}
 7568   ins_pipe(pipe_cmov_mem); // XXX
 7569 %}
 7570 
 7571 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 7572   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7573   ins_cost(200);
 7574   expand %{
 7575     cmovL_memU(cop, cr, dst, src);
 7576   %}
 7577 %}
 7578 
 7579 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 7580 %{
 7581   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7582 
 7583   ins_cost(200); // XXX
 7584   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7585             "movss     $dst, $src\n"
 7586     "skip:" %}
 7587   ins_encode %{
 7588     Label Lskip;
 7589     // Invert sense of branch from sense of CMOV
 7590     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7591     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7592     __ bind(Lskip);
 7593   %}
 7594   ins_pipe(pipe_slow);
 7595 %}
 7596 
 7597 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
 7598 // %{
 7599 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
 7600 
 7601 //   ins_cost(200); // XXX
 7602 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7603 //             "movss     $dst, $src\n"
 7604 //     "skip:" %}
 7605 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
 7606 //   ins_pipe(pipe_slow);
 7607 // %}
 7608 
 7609 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 7610 %{
 7611   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7612 
 7613   ins_cost(200); // XXX
 7614   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 7615             "movss     $dst, $src\n"
 7616     "skip:" %}
 7617   ins_encode %{
 7618     Label Lskip;
 7619     // Invert sense of branch from sense of CMOV
 7620     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7621     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7622     __ bind(Lskip);
 7623   %}
 7624   ins_pipe(pipe_slow);
 7625 %}
 7626 
 7627 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 7628   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7629   ins_cost(200);
 7630   expand %{
 7631     cmovF_regU(cop, cr, dst, src);
 7632   %}
 7633 %}
 7634 
 7635 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 7636 %{
 7637   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7638 
 7639   ins_cost(200); // XXX
 7640   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 7641             "movsd     $dst, $src\n"
 7642     "skip:" %}
 7643   ins_encode %{
 7644     Label Lskip;
 7645     // Invert sense of branch from sense of CMOV
 7646     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7647     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7648     __ bind(Lskip);
 7649   %}
 7650   ins_pipe(pipe_slow);
 7651 %}
 7652 
 7653 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 7654 %{
 7655   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7656 
 7657   ins_cost(200); // XXX
 7658   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 7659             "movsd     $dst, $src\n"
 7660     "skip:" %}
 7661   ins_encode %{
 7662     Label Lskip;
 7663     // Invert sense of branch from sense of CMOV
 7664     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7665     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7666     __ bind(Lskip);
 7667   %}
 7668   ins_pipe(pipe_slow);
 7669 %}
 7670 
 7671 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 7672   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7673   ins_cost(200);
 7674   expand %{
 7675     cmovD_regU(cop, cr, dst, src);
 7676   %}
 7677 %}
 7678 
 7679 //----------Arithmetic Instructions--------------------------------------------
 7680 //----------Addition Instructions----------------------------------------------
 7681 
 7682 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7683 %{
 7684   match(Set dst (AddI dst src));
 7685   effect(KILL cr);
 7686 
 7687   format %{ "addl    $dst, $src\t# int" %}
 7688   ins_encode %{
 7689     __ addl($dst$$Register, $src$$Register);
 7690   %}
 7691   ins_pipe(ialu_reg_reg);
 7692 %}
 7693 
 7694 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 7695 %{
 7696   match(Set dst (AddI dst src));
 7697   effect(KILL cr);
 7698 
 7699   format %{ "addl    $dst, $src\t# int" %}
 7700   ins_encode %{
 7701     __ addl($dst$$Register, $src$$constant);
 7702   %}
 7703   ins_pipe( ialu_reg );
 7704 %}
 7705 
 7706 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7707 %{
 7708   match(Set dst (AddI dst (LoadI src)));
 7709   effect(KILL cr);
 7710 
 7711   ins_cost(150); // XXX
 7712   format %{ "addl    $dst, $src\t# int" %}
 7713   ins_encode %{
 7714     __ addl($dst$$Register, $src$$Address);
 7715   %}
 7716   ins_pipe(ialu_reg_mem);
 7717 %}
 7718 
 7719 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7720 %{
 7721   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7722   effect(KILL cr);
 7723 
 7724   ins_cost(150); // XXX
 7725   format %{ "addl    $dst, $src\t# int" %}
 7726   ins_encode %{
 7727     __ addl($dst$$Address, $src$$Register);
 7728   %}
 7729   ins_pipe(ialu_mem_reg);
 7730 %}
 7731 
 7732 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 7733 %{
 7734   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7735   effect(KILL cr);
 7736 
 7737   ins_cost(125); // XXX
 7738   format %{ "addl    $dst, $src\t# int" %}
 7739   ins_encode %{
 7740     __ addl($dst$$Address, $src$$constant);
 7741   %}
 7742   ins_pipe(ialu_mem_imm);
 7743 %}
 7744 
 7745 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 7746 %{
 7747   predicate(UseIncDec);
 7748   match(Set dst (AddI dst src));
 7749   effect(KILL cr);
 7750 
 7751   format %{ "incl    $dst\t# int" %}
 7752   ins_encode %{
 7753     __ incrementl($dst$$Register);
 7754   %}
 7755   ins_pipe(ialu_reg);
 7756 %}
 7757 
 7758 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 7759 %{
 7760   predicate(UseIncDec);
 7761   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7762   effect(KILL cr);
 7763 
 7764   ins_cost(125); // XXX
 7765   format %{ "incl    $dst\t# int" %}
 7766   ins_encode %{
 7767     __ incrementl($dst$$Address);
 7768   %}
 7769   ins_pipe(ialu_mem_imm);
 7770 %}
 7771 
 7772 // XXX why does that use AddI
 7773 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 7774 %{
 7775   predicate(UseIncDec);
 7776   match(Set dst (AddI dst src));
 7777   effect(KILL cr);
 7778 
 7779   format %{ "decl    $dst\t# int" %}
 7780   ins_encode %{
 7781     __ decrementl($dst$$Register);
 7782   %}
 7783   ins_pipe(ialu_reg);
 7784 %}
 7785 
 7786 // XXX why does that use AddI
 7787 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 7788 %{
 7789   predicate(UseIncDec);
 7790   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7791   effect(KILL cr);
 7792 
 7793   ins_cost(125); // XXX
 7794   format %{ "decl    $dst\t# int" %}
 7795   ins_encode %{
 7796     __ decrementl($dst$$Address);
 7797   %}
 7798   ins_pipe(ialu_mem_imm);
 7799 %}
 7800 
 7801 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 7802 %{
 7803   predicate(VM_Version::supports_fast_2op_lea());
 7804   match(Set dst (AddI (LShiftI index scale) disp));
 7805 
 7806   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 7807   ins_encode %{
 7808     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7809     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7810   %}
 7811   ins_pipe(ialu_reg_reg);
 7812 %}
 7813 
 7814 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 7815 %{
 7816   predicate(VM_Version::supports_fast_3op_lea());
 7817   match(Set dst (AddI (AddI base index) disp));
 7818 
 7819   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 7820   ins_encode %{
 7821     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7822   %}
 7823   ins_pipe(ialu_reg_reg);
 7824 %}
 7825 
 7826 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 7827 %{
 7828   predicate(VM_Version::supports_fast_2op_lea());
 7829   match(Set dst (AddI base (LShiftI index scale)));
 7830 
 7831   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 7832   ins_encode %{
 7833     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7834     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7835   %}
 7836   ins_pipe(ialu_reg_reg);
 7837 %}
 7838 
 7839 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 7840 %{
 7841   predicate(VM_Version::supports_fast_3op_lea());
 7842   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 7843 
 7844   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 7845   ins_encode %{
 7846     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7847     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7848   %}
 7849   ins_pipe(ialu_reg_reg);
 7850 %}
 7851 
 7852 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7853 %{
 7854   match(Set dst (AddL dst src));
 7855   effect(KILL cr);
 7856 
 7857   format %{ "addq    $dst, $src\t# long" %}
 7858   ins_encode %{
 7859     __ addq($dst$$Register, $src$$Register);
 7860   %}
 7861   ins_pipe(ialu_reg_reg);
 7862 %}
 7863 
 7864 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 7865 %{
 7866   match(Set dst (AddL dst src));
 7867   effect(KILL cr);
 7868 
 7869   format %{ "addq    $dst, $src\t# long" %}
 7870   ins_encode %{
 7871     __ addq($dst$$Register, $src$$constant);
 7872   %}
 7873   ins_pipe( ialu_reg );
 7874 %}
 7875 
 7876 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7877 %{
 7878   match(Set dst (AddL dst (LoadL src)));
 7879   effect(KILL cr);
 7880 
 7881   ins_cost(150); // XXX
 7882   format %{ "addq    $dst, $src\t# long" %}
 7883   ins_encode %{
 7884     __ addq($dst$$Register, $src$$Address);
 7885   %}
 7886   ins_pipe(ialu_reg_mem);
 7887 %}
 7888 
 7889 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7890 %{
 7891   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7892   effect(KILL cr);
 7893 
 7894   ins_cost(150); // XXX
 7895   format %{ "addq    $dst, $src\t# long" %}
 7896   ins_encode %{
 7897     __ addq($dst$$Address, $src$$Register);
 7898   %}
 7899   ins_pipe(ialu_mem_reg);
 7900 %}
 7901 
 7902 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7903 %{
 7904   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7905   effect(KILL cr);
 7906 
 7907   ins_cost(125); // XXX
 7908   format %{ "addq    $dst, $src\t# long" %}
 7909   ins_encode %{
 7910     __ addq($dst$$Address, $src$$constant);
 7911   %}
 7912   ins_pipe(ialu_mem_imm);
 7913 %}
 7914 
 7915 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7916 %{
 7917   predicate(UseIncDec);
 7918   match(Set dst (AddL dst src));
 7919   effect(KILL cr);
 7920 
 7921   format %{ "incq    $dst\t# long" %}
 7922   ins_encode %{
 7923     __ incrementq($dst$$Register);
 7924   %}
 7925   ins_pipe(ialu_reg);
 7926 %}
 7927 
 7928 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7929 %{
 7930   predicate(UseIncDec);
 7931   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7932   effect(KILL cr);
 7933 
 7934   ins_cost(125); // XXX
 7935   format %{ "incq    $dst\t# long" %}
 7936   ins_encode %{
 7937     __ incrementq($dst$$Address);
 7938   %}
 7939   ins_pipe(ialu_mem_imm);
 7940 %}
 7941 
 7942 // XXX why does that use AddL
 7943 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7944 %{
 7945   predicate(UseIncDec);
 7946   match(Set dst (AddL dst src));
 7947   effect(KILL cr);
 7948 
 7949   format %{ "decq    $dst\t# long" %}
 7950   ins_encode %{
 7951     __ decrementq($dst$$Register);
 7952   %}
 7953   ins_pipe(ialu_reg);
 7954 %}
 7955 
 7956 // XXX why does that use AddL
 7957 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7958 %{
 7959   predicate(UseIncDec);
 7960   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7961   effect(KILL cr);
 7962 
 7963   ins_cost(125); // XXX
 7964   format %{ "decq    $dst\t# long" %}
 7965   ins_encode %{
 7966     __ decrementq($dst$$Address);
 7967   %}
 7968   ins_pipe(ialu_mem_imm);
 7969 %}
 7970 
 7971 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7972 %{
 7973   predicate(VM_Version::supports_fast_2op_lea());
 7974   match(Set dst (AddL (LShiftL index scale) disp));
 7975 
 7976   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7977   ins_encode %{
 7978     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7979     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7980   %}
 7981   ins_pipe(ialu_reg_reg);
 7982 %}
 7983 
 7984 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7985 %{
 7986   predicate(VM_Version::supports_fast_3op_lea());
 7987   match(Set dst (AddL (AddL base index) disp));
 7988 
 7989   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7990   ins_encode %{
 7991     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7992   %}
 7993   ins_pipe(ialu_reg_reg);
 7994 %}
 7995 
 7996 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7997 %{
 7998   predicate(VM_Version::supports_fast_2op_lea());
 7999   match(Set dst (AddL base (LShiftL index scale)));
 8000 
 8001   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 8002   ins_encode %{
 8003     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 8004     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 8005   %}
 8006   ins_pipe(ialu_reg_reg);
 8007 %}
 8008 
 8009 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 8010 %{
 8011   predicate(VM_Version::supports_fast_3op_lea());
 8012   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 8013 
 8014   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 8015   ins_encode %{
 8016     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 8017     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 8018   %}
 8019   ins_pipe(ialu_reg_reg);
 8020 %}
 8021 
 8022 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 8023 %{
 8024   match(Set dst (AddP dst src));
 8025   effect(KILL cr);
 8026 
 8027   format %{ "addq    $dst, $src\t# ptr" %}
 8028   ins_encode %{
 8029     __ addq($dst$$Register, $src$$Register);
 8030   %}
 8031   ins_pipe(ialu_reg_reg);
 8032 %}
 8033 
 8034 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 8035 %{
 8036   match(Set dst (AddP dst src));
 8037   effect(KILL cr);
 8038 
 8039   format %{ "addq    $dst, $src\t# ptr" %}
 8040   ins_encode %{
 8041     __ addq($dst$$Register, $src$$constant);
 8042   %}
 8043   ins_pipe( ialu_reg );
 8044 %}
 8045 
 8046 // XXX addP mem ops ????
 8047 
 8048 instruct checkCastPP(rRegP dst)
 8049 %{
 8050   match(Set dst (CheckCastPP dst));
 8051 
 8052   size(0);
 8053   format %{ "# checkcastPP of $dst" %}
 8054   ins_encode(/* empty encoding */);
 8055   ins_pipe(empty);
 8056 %}
 8057 
 8058 instruct castPP(rRegP dst)
 8059 %{
 8060   match(Set dst (CastPP dst));
 8061 
 8062   size(0);
 8063   format %{ "# castPP of $dst" %}
 8064   ins_encode(/* empty encoding */);
 8065   ins_pipe(empty);
 8066 %}
 8067 
 8068 instruct castII(rRegI dst)
 8069 %{
 8070   match(Set dst (CastII dst));
 8071 
 8072   size(0);
 8073   format %{ "# castII of $dst" %}
 8074   ins_encode(/* empty encoding */);
 8075   ins_cost(0);
 8076   ins_pipe(empty);
 8077 %}
 8078 
 8079 instruct castLL(rRegL dst)
 8080 %{
 8081   match(Set dst (CastLL dst));
 8082 
 8083   size(0);
 8084   format %{ "# castLL of $dst" %}
 8085   ins_encode(/* empty encoding */);
 8086   ins_cost(0);
 8087   ins_pipe(empty);
 8088 %}
 8089 
 8090 instruct castFF(regF dst)
 8091 %{
 8092   match(Set dst (CastFF dst));
 8093 
 8094   size(0);
 8095   format %{ "# castFF of $dst" %}
 8096   ins_encode(/* empty encoding */);
 8097   ins_cost(0);
 8098   ins_pipe(empty);
 8099 %}
 8100 
 8101 instruct castDD(regD dst)
 8102 %{
 8103   match(Set dst (CastDD dst));
 8104 
 8105   size(0);
 8106   format %{ "# castDD of $dst" %}
 8107   ins_encode(/* empty encoding */);
 8108   ins_cost(0);
 8109   ins_pipe(empty);
 8110 %}
 8111 
 8112 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 8113 instruct compareAndSwapP(rRegI res,
 8114                          memory mem_ptr,
 8115                          rax_RegP oldval, rRegP newval,
 8116                          rFlagsReg cr)
 8117 %{
 8118   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8119   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 8120   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 8121   effect(KILL cr, KILL oldval);
 8122 
 8123   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8124             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8125             "sete    $res\n\t"
 8126             "movzbl  $res, $res" %}
 8127   ins_encode %{
 8128     __ lock();
 8129     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8130     __ sete($res$$Register);
 8131     __ movzbl($res$$Register, $res$$Register);
 8132   %}
 8133   ins_pipe( pipe_cmpxchg );
 8134 %}
 8135 
 8136 instruct compareAndSwapL(rRegI res,
 8137                          memory mem_ptr,
 8138                          rax_RegL oldval, rRegL newval,
 8139                          rFlagsReg cr)
 8140 %{
 8141   predicate(VM_Version::supports_cx8());
 8142   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 8143   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 8144   effect(KILL cr, KILL oldval);
 8145 
 8146   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8147             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8148             "sete    $res\n\t"
 8149             "movzbl  $res, $res" %}
 8150   ins_encode %{
 8151     __ lock();
 8152     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8153     __ sete($res$$Register);
 8154     __ movzbl($res$$Register, $res$$Register);
 8155   %}
 8156   ins_pipe( pipe_cmpxchg );
 8157 %}
 8158 
 8159 instruct compareAndSwapI(rRegI res,
 8160                          memory mem_ptr,
 8161                          rax_RegI oldval, rRegI newval,
 8162                          rFlagsReg cr)
 8163 %{
 8164   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 8165   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 8166   effect(KILL cr, KILL oldval);
 8167 
 8168   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8169             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8170             "sete    $res\n\t"
 8171             "movzbl  $res, $res" %}
 8172   ins_encode %{
 8173     __ lock();
 8174     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8175     __ sete($res$$Register);
 8176     __ movzbl($res$$Register, $res$$Register);
 8177   %}
 8178   ins_pipe( pipe_cmpxchg );
 8179 %}
 8180 
 8181 instruct compareAndSwapB(rRegI res,
 8182                          memory mem_ptr,
 8183                          rax_RegI oldval, rRegI newval,
 8184                          rFlagsReg cr)
 8185 %{
 8186   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 8187   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 8188   effect(KILL cr, KILL oldval);
 8189 
 8190   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8191             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8192             "sete    $res\n\t"
 8193             "movzbl  $res, $res" %}
 8194   ins_encode %{
 8195     __ lock();
 8196     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8197     __ sete($res$$Register);
 8198     __ movzbl($res$$Register, $res$$Register);
 8199   %}
 8200   ins_pipe( pipe_cmpxchg );
 8201 %}
 8202 
 8203 instruct compareAndSwapS(rRegI res,
 8204                          memory mem_ptr,
 8205                          rax_RegI oldval, rRegI newval,
 8206                          rFlagsReg cr)
 8207 %{
 8208   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 8209   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 8210   effect(KILL cr, KILL oldval);
 8211 
 8212   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8213             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8214             "sete    $res\n\t"
 8215             "movzbl  $res, $res" %}
 8216   ins_encode %{
 8217     __ lock();
 8218     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8219     __ sete($res$$Register);
 8220     __ movzbl($res$$Register, $res$$Register);
 8221   %}
 8222   ins_pipe( pipe_cmpxchg );
 8223 %}
 8224 
 8225 instruct compareAndSwapN(rRegI res,
 8226                           memory mem_ptr,
 8227                           rax_RegN oldval, rRegN newval,
 8228                           rFlagsReg cr) %{
 8229   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 8230   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 8231   effect(KILL cr, KILL oldval);
 8232 
 8233   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8234             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8235             "sete    $res\n\t"
 8236             "movzbl  $res, $res" %}
 8237   ins_encode %{
 8238     __ lock();
 8239     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8240     __ sete($res$$Register);
 8241     __ movzbl($res$$Register, $res$$Register);
 8242   %}
 8243   ins_pipe( pipe_cmpxchg );
 8244 %}
 8245 
 8246 instruct compareAndExchangeB(
 8247                          memory mem_ptr,
 8248                          rax_RegI oldval, rRegI newval,
 8249                          rFlagsReg cr)
 8250 %{
 8251   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 8252   effect(KILL cr);
 8253 
 8254   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8255             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8256   ins_encode %{
 8257     __ lock();
 8258     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8259   %}
 8260   ins_pipe( pipe_cmpxchg );
 8261 %}
 8262 
 8263 instruct compareAndExchangeS(
 8264                          memory mem_ptr,
 8265                          rax_RegI oldval, rRegI newval,
 8266                          rFlagsReg cr)
 8267 %{
 8268   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 8269   effect(KILL cr);
 8270 
 8271   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8272             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8273   ins_encode %{
 8274     __ lock();
 8275     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8276   %}
 8277   ins_pipe( pipe_cmpxchg );
 8278 %}
 8279 
 8280 instruct compareAndExchangeI(
 8281                          memory mem_ptr,
 8282                          rax_RegI oldval, rRegI newval,
 8283                          rFlagsReg cr)
 8284 %{
 8285   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 8286   effect(KILL cr);
 8287 
 8288   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8289             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8290   ins_encode %{
 8291     __ lock();
 8292     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8293   %}
 8294   ins_pipe( pipe_cmpxchg );
 8295 %}
 8296 
 8297 instruct compareAndExchangeL(
 8298                          memory mem_ptr,
 8299                          rax_RegL oldval, rRegL newval,
 8300                          rFlagsReg cr)
 8301 %{
 8302   predicate(VM_Version::supports_cx8());
 8303   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 8304   effect(KILL cr);
 8305 
 8306   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8307             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8308   ins_encode %{
 8309     __ lock();
 8310     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8311   %}
 8312   ins_pipe( pipe_cmpxchg );
 8313 %}
 8314 
 8315 instruct compareAndExchangeN(
 8316                           memory mem_ptr,
 8317                           rax_RegN oldval, rRegN newval,
 8318                           rFlagsReg cr) %{
 8319   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 8320   effect(KILL cr);
 8321 
 8322   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8323             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8324   ins_encode %{
 8325     __ lock();
 8326     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8327   %}
 8328   ins_pipe( pipe_cmpxchg );
 8329 %}
 8330 
 8331 instruct compareAndExchangeP(
 8332                          memory mem_ptr,
 8333                          rax_RegP oldval, rRegP newval,
 8334                          rFlagsReg cr)
 8335 %{
 8336   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8337   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 8338   effect(KILL cr);
 8339 
 8340   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8341             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8342   ins_encode %{
 8343     __ lock();
 8344     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8345   %}
 8346   ins_pipe( pipe_cmpxchg );
 8347 %}
 8348 
 8349 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8350   predicate(n->as_LoadStore()->result_not_used());
 8351   match(Set dummy (GetAndAddB mem add));
 8352   effect(KILL cr);
 8353   format %{ "ADDB  [$mem],$add" %}
 8354   ins_encode %{
 8355     __ lock();
 8356     __ addb($mem$$Address, $add$$constant);
 8357   %}
 8358   ins_pipe( pipe_cmpxchg );
 8359 %}
 8360 
 8361 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
 8362   match(Set newval (GetAndAddB mem newval));
 8363   effect(KILL cr);
 8364   format %{ "XADDB  [$mem],$newval" %}
 8365   ins_encode %{
 8366     __ lock();
 8367     __ xaddb($mem$$Address, $newval$$Register);
 8368   %}
 8369   ins_pipe( pipe_cmpxchg );
 8370 %}
 8371 
 8372 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8373   predicate(n->as_LoadStore()->result_not_used());
 8374   match(Set dummy (GetAndAddS mem add));
 8375   effect(KILL cr);
 8376   format %{ "ADDW  [$mem],$add" %}
 8377   ins_encode %{
 8378     __ lock();
 8379     __ addw($mem$$Address, $add$$constant);
 8380   %}
 8381   ins_pipe( pipe_cmpxchg );
 8382 %}
 8383 
 8384 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
 8385   match(Set newval (GetAndAddS mem newval));
 8386   effect(KILL cr);
 8387   format %{ "XADDW  [$mem],$newval" %}
 8388   ins_encode %{
 8389     __ lock();
 8390     __ xaddw($mem$$Address, $newval$$Register);
 8391   %}
 8392   ins_pipe( pipe_cmpxchg );
 8393 %}
 8394 
 8395 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8396   predicate(n->as_LoadStore()->result_not_used());
 8397   match(Set dummy (GetAndAddI mem add));
 8398   effect(KILL cr);
 8399   format %{ "ADDL  [$mem],$add" %}
 8400   ins_encode %{
 8401     __ lock();
 8402     __ addl($mem$$Address, $add$$constant);
 8403   %}
 8404   ins_pipe( pipe_cmpxchg );
 8405 %}
 8406 
 8407 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
 8408   match(Set newval (GetAndAddI mem newval));
 8409   effect(KILL cr);
 8410   format %{ "XADDL  [$mem],$newval" %}
 8411   ins_encode %{
 8412     __ lock();
 8413     __ xaddl($mem$$Address, $newval$$Register);
 8414   %}
 8415   ins_pipe( pipe_cmpxchg );
 8416 %}
 8417 
 8418 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 8419   predicate(n->as_LoadStore()->result_not_used());
 8420   match(Set dummy (GetAndAddL mem add));
 8421   effect(KILL cr);
 8422   format %{ "ADDQ  [$mem],$add" %}
 8423   ins_encode %{
 8424     __ lock();
 8425     __ addq($mem$$Address, $add$$constant);
 8426   %}
 8427   ins_pipe( pipe_cmpxchg );
 8428 %}
 8429 
 8430 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
 8431   match(Set newval (GetAndAddL mem newval));
 8432   effect(KILL cr);
 8433   format %{ "XADDQ  [$mem],$newval" %}
 8434   ins_encode %{
 8435     __ lock();
 8436     __ xaddq($mem$$Address, $newval$$Register);
 8437   %}
 8438   ins_pipe( pipe_cmpxchg );
 8439 %}
 8440 
 8441 instruct xchgB( memory mem, rRegI newval) %{
 8442   match(Set newval (GetAndSetB mem newval));
 8443   format %{ "XCHGB  $newval,[$mem]" %}
 8444   ins_encode %{
 8445     __ xchgb($newval$$Register, $mem$$Address);
 8446   %}
 8447   ins_pipe( pipe_cmpxchg );
 8448 %}
 8449 
 8450 instruct xchgS( memory mem, rRegI newval) %{
 8451   match(Set newval (GetAndSetS mem newval));
 8452   format %{ "XCHGW  $newval,[$mem]" %}
 8453   ins_encode %{
 8454     __ xchgw($newval$$Register, $mem$$Address);
 8455   %}
 8456   ins_pipe( pipe_cmpxchg );
 8457 %}
 8458 
 8459 instruct xchgI( memory mem, rRegI newval) %{
 8460   match(Set newval (GetAndSetI mem newval));
 8461   format %{ "XCHGL  $newval,[$mem]" %}
 8462   ins_encode %{
 8463     __ xchgl($newval$$Register, $mem$$Address);
 8464   %}
 8465   ins_pipe( pipe_cmpxchg );
 8466 %}
 8467 
 8468 instruct xchgL( memory mem, rRegL newval) %{
 8469   match(Set newval (GetAndSetL mem newval));
 8470   format %{ "XCHGL  $newval,[$mem]" %}
 8471   ins_encode %{
 8472     __ xchgq($newval$$Register, $mem$$Address);
 8473   %}
 8474   ins_pipe( pipe_cmpxchg );
 8475 %}
 8476 
 8477 instruct xchgP( memory mem, rRegP newval) %{
 8478   match(Set newval (GetAndSetP mem newval));
 8479   predicate(n->as_LoadStore()->barrier_data() == 0);
 8480   format %{ "XCHGQ  $newval,[$mem]" %}
 8481   ins_encode %{
 8482     __ xchgq($newval$$Register, $mem$$Address);
 8483   %}
 8484   ins_pipe( pipe_cmpxchg );
 8485 %}
 8486 
 8487 instruct xchgN( memory mem, rRegN newval) %{
 8488   match(Set newval (GetAndSetN mem newval));
 8489   format %{ "XCHGL  $newval,$mem]" %}
 8490   ins_encode %{
 8491     __ xchgl($newval$$Register, $mem$$Address);
 8492   %}
 8493   ins_pipe( pipe_cmpxchg );
 8494 %}
 8495 
 8496 //----------Abs Instructions-------------------------------------------
 8497 
 8498 // Integer Absolute Instructions
 8499 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr)
 8500 %{
 8501   match(Set dst (AbsI src));
 8502   effect(TEMP dst, TEMP tmp, KILL cr);
 8503   format %{ "movl $tmp, $src\n\t"
 8504             "sarl $tmp, 31\n\t"
 8505             "movl $dst, $src\n\t"
 8506             "xorl $dst, $tmp\n\t"
 8507             "subl $dst, $tmp\n"
 8508           %}
 8509   ins_encode %{
 8510     __ movl($tmp$$Register, $src$$Register);
 8511     __ sarl($tmp$$Register, 31);
 8512     __ movl($dst$$Register, $src$$Register);
 8513     __ xorl($dst$$Register, $tmp$$Register);
 8514     __ subl($dst$$Register, $tmp$$Register);
 8515   %}
 8516 
 8517   ins_pipe(ialu_reg_reg);
 8518 %}
 8519 
 8520 // Long Absolute Instructions
 8521 instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr)
 8522 %{
 8523   match(Set dst (AbsL src));
 8524   effect(TEMP dst, TEMP tmp, KILL cr);
 8525   format %{ "movq $tmp, $src\n\t"
 8526             "sarq $tmp, 63\n\t"
 8527             "movq $dst, $src\n\t"
 8528             "xorq $dst, $tmp\n\t"
 8529             "subq $dst, $tmp\n"
 8530           %}
 8531   ins_encode %{
 8532     __ movq($tmp$$Register, $src$$Register);
 8533     __ sarq($tmp$$Register, 63);
 8534     __ movq($dst$$Register, $src$$Register);
 8535     __ xorq($dst$$Register, $tmp$$Register);
 8536     __ subq($dst$$Register, $tmp$$Register);
 8537   %}
 8538 
 8539   ins_pipe(ialu_reg_reg);
 8540 %}
 8541 
 8542 //----------Subtraction Instructions-------------------------------------------
 8543 
 8544 // Integer Subtraction Instructions
 8545 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8546 %{
 8547   match(Set dst (SubI dst src));
 8548   effect(KILL cr);
 8549 
 8550   format %{ "subl    $dst, $src\t# int" %}
 8551   ins_encode %{
 8552     __ subl($dst$$Register, $src$$Register);
 8553   %}
 8554   ins_pipe(ialu_reg_reg);
 8555 %}
 8556 
 8557 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 8558 %{
 8559   match(Set dst (SubI dst (LoadI src)));
 8560   effect(KILL cr);
 8561 
 8562   ins_cost(150);
 8563   format %{ "subl    $dst, $src\t# int" %}
 8564   ins_encode %{
 8565     __ subl($dst$$Register, $src$$Address);
 8566   %}
 8567   ins_pipe(ialu_reg_mem);
 8568 %}
 8569 
 8570 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 8571 %{
 8572   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 8573   effect(KILL cr);
 8574 
 8575   ins_cost(150);
 8576   format %{ "subl    $dst, $src\t# int" %}
 8577   ins_encode %{
 8578     __ subl($dst$$Address, $src$$Register);
 8579   %}
 8580   ins_pipe(ialu_mem_reg);
 8581 %}
 8582 
 8583 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8584 %{
 8585   match(Set dst (SubL dst src));
 8586   effect(KILL cr);
 8587 
 8588   format %{ "subq    $dst, $src\t# long" %}
 8589   ins_encode %{
 8590     __ subq($dst$$Register, $src$$Register);
 8591   %}
 8592   ins_pipe(ialu_reg_reg);
 8593 %}
 8594 
 8595 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 8596 %{
 8597   match(Set dst (SubL dst (LoadL src)));
 8598   effect(KILL cr);
 8599 
 8600   ins_cost(150);
 8601   format %{ "subq    $dst, $src\t# long" %}
 8602   ins_encode %{
 8603     __ subq($dst$$Register, $src$$Address);
 8604   %}
 8605   ins_pipe(ialu_reg_mem);
 8606 %}
 8607 
 8608 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 8609 %{
 8610   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 8611   effect(KILL cr);
 8612 
 8613   ins_cost(150);
 8614   format %{ "subq    $dst, $src\t# long" %}
 8615   ins_encode %{
 8616     __ subq($dst$$Address, $src$$Register);
 8617   %}
 8618   ins_pipe(ialu_mem_reg);
 8619 %}
 8620 
 8621 // Subtract from a pointer
 8622 // XXX hmpf???
 8623 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 8624 %{
 8625   match(Set dst (AddP dst (SubI zero src)));
 8626   effect(KILL cr);
 8627 
 8628   format %{ "subq    $dst, $src\t# ptr - int" %}
 8629   opcode(0x2B);
 8630   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
 8631   ins_pipe(ialu_reg_reg);
 8632 %}
 8633 
 8634 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 8635 %{
 8636   match(Set dst (SubI zero dst));
 8637   effect(KILL cr);
 8638 
 8639   format %{ "negl    $dst\t# int" %}
 8640   ins_encode %{
 8641     __ negl($dst$$Register);
 8642   %}
 8643   ins_pipe(ialu_reg);
 8644 %}
 8645 
 8646 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 8647 %{
 8648   match(Set dst (NegI dst));
 8649   effect(KILL cr);
 8650 
 8651   format %{ "negl    $dst\t# int" %}
 8652   ins_encode %{
 8653     __ negl($dst$$Register);
 8654   %}
 8655   ins_pipe(ialu_reg);
 8656 %}
 8657 
 8658 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 8659 %{
 8660   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 8661   effect(KILL cr);
 8662 
 8663   format %{ "negl    $dst\t# int" %}
 8664   ins_encode %{
 8665     __ negl($dst$$Address);
 8666   %}
 8667   ins_pipe(ialu_reg);
 8668 %}
 8669 
 8670 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 8671 %{
 8672   match(Set dst (SubL zero dst));
 8673   effect(KILL cr);
 8674 
 8675   format %{ "negq    $dst\t# long" %}
 8676   ins_encode %{
 8677     __ negq($dst$$Register);
 8678   %}
 8679   ins_pipe(ialu_reg);
 8680 %}
 8681 
 8682 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 8683 %{
 8684   match(Set dst (NegL dst));
 8685   effect(KILL cr);
 8686 
 8687   format %{ "negq    $dst\t# int" %}
 8688   ins_encode %{
 8689     __ negq($dst$$Register);
 8690   %}
 8691   ins_pipe(ialu_reg);
 8692 %}
 8693 
 8694 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 8695 %{
 8696   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 8697   effect(KILL cr);
 8698 
 8699   format %{ "negq    $dst\t# long" %}
 8700   ins_encode %{
 8701     __ negq($dst$$Address);
 8702   %}
 8703   ins_pipe(ialu_reg);
 8704 %}
 8705 
 8706 //----------Multiplication/Division Instructions-------------------------------
 8707 // Integer Multiplication Instructions
 8708 // Multiply Register
 8709 
 8710 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8711 %{
 8712   match(Set dst (MulI dst src));
 8713   effect(KILL cr);
 8714 
 8715   ins_cost(300);
 8716   format %{ "imull   $dst, $src\t# int" %}
 8717   ins_encode %{
 8718     __ imull($dst$$Register, $src$$Register);
 8719   %}
 8720   ins_pipe(ialu_reg_reg_alu0);
 8721 %}
 8722 
 8723 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 8724 %{
 8725   match(Set dst (MulI src imm));
 8726   effect(KILL cr);
 8727 
 8728   ins_cost(300);
 8729   format %{ "imull   $dst, $src, $imm\t# int" %}
 8730   ins_encode %{
 8731     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 8732   %}
 8733   ins_pipe(ialu_reg_reg_alu0);
 8734 %}
 8735 
 8736 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 8737 %{
 8738   match(Set dst (MulI dst (LoadI src)));
 8739   effect(KILL cr);
 8740 
 8741   ins_cost(350);
 8742   format %{ "imull   $dst, $src\t# int" %}
 8743   ins_encode %{
 8744     __ imull($dst$$Register, $src$$Address);
 8745   %}
 8746   ins_pipe(ialu_reg_mem_alu0);
 8747 %}
 8748 
 8749 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 8750 %{
 8751   match(Set dst (MulI (LoadI src) imm));
 8752   effect(KILL cr);
 8753 
 8754   ins_cost(300);
 8755   format %{ "imull   $dst, $src, $imm\t# int" %}
 8756   ins_encode %{
 8757     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 8758   %}
 8759   ins_pipe(ialu_reg_mem_alu0);
 8760 %}
 8761 
 8762 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 8763 %{
 8764   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 8765   effect(KILL cr, KILL src2);
 8766 
 8767   expand %{ mulI_rReg(dst, src1, cr);
 8768            mulI_rReg(src2, src3, cr);
 8769            addI_rReg(dst, src2, cr); %}
 8770 %}
 8771 
 8772 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8773 %{
 8774   match(Set dst (MulL dst src));
 8775   effect(KILL cr);
 8776 
 8777   ins_cost(300);
 8778   format %{ "imulq   $dst, $src\t# long" %}
 8779   ins_encode %{
 8780     __ imulq($dst$$Register, $src$$Register);
 8781   %}
 8782   ins_pipe(ialu_reg_reg_alu0);
 8783 %}
 8784 
 8785 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 8786 %{
 8787   match(Set dst (MulL src imm));
 8788   effect(KILL cr);
 8789 
 8790   ins_cost(300);
 8791   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8792   ins_encode %{
 8793     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 8794   %}
 8795   ins_pipe(ialu_reg_reg_alu0);
 8796 %}
 8797 
 8798 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 8799 %{
 8800   match(Set dst (MulL dst (LoadL src)));
 8801   effect(KILL cr);
 8802 
 8803   ins_cost(350);
 8804   format %{ "imulq   $dst, $src\t# long" %}
 8805   ins_encode %{
 8806     __ imulq($dst$$Register, $src$$Address);
 8807   %}
 8808   ins_pipe(ialu_reg_mem_alu0);
 8809 %}
 8810 
 8811 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 8812 %{
 8813   match(Set dst (MulL (LoadL src) imm));
 8814   effect(KILL cr);
 8815 
 8816   ins_cost(300);
 8817   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8818   ins_encode %{
 8819     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 8820   %}
 8821   ins_pipe(ialu_reg_mem_alu0);
 8822 %}
 8823 
 8824 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8825 %{
 8826   match(Set dst (MulHiL src rax));
 8827   effect(USE_KILL rax, KILL cr);
 8828 
 8829   ins_cost(300);
 8830   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 8831   ins_encode %{
 8832     __ imulq($src$$Register);
 8833   %}
 8834   ins_pipe(ialu_reg_reg_alu0);
 8835 %}
 8836 
 8837 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8838 %{
 8839   match(Set dst (UMulHiL src rax));
 8840   effect(USE_KILL rax, KILL cr);
 8841 
 8842   ins_cost(300);
 8843   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8844   ins_encode %{
 8845     __ mulq($src$$Register);
 8846   %}
 8847   ins_pipe(ialu_reg_reg_alu0);
 8848 %}
 8849 
 8850 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8851                    rFlagsReg cr)
 8852 %{
 8853   match(Set rax (DivI rax div));
 8854   effect(KILL rdx, KILL cr);
 8855 
 8856   ins_cost(30*100+10*100); // XXX
 8857   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8858             "jne,s   normal\n\t"
 8859             "xorl    rdx, rdx\n\t"
 8860             "cmpl    $div, -1\n\t"
 8861             "je,s    done\n"
 8862     "normal: cdql\n\t"
 8863             "idivl   $div\n"
 8864     "done:"        %}
 8865   ins_encode(cdql_enc(div));
 8866   ins_pipe(ialu_reg_reg_alu0);
 8867 %}
 8868 
 8869 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8870                    rFlagsReg cr)
 8871 %{
 8872   match(Set rax (DivL rax div));
 8873   effect(KILL rdx, KILL cr);
 8874 
 8875   ins_cost(30*100+10*100); // XXX
 8876   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8877             "cmpq    rax, rdx\n\t"
 8878             "jne,s   normal\n\t"
 8879             "xorl    rdx, rdx\n\t"
 8880             "cmpq    $div, -1\n\t"
 8881             "je,s    done\n"
 8882     "normal: cdqq\n\t"
 8883             "idivq   $div\n"
 8884     "done:"        %}
 8885   ins_encode(cdqq_enc(div));
 8886   ins_pipe(ialu_reg_reg_alu0);
 8887 %}
 8888 
 8889 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8890 %{
 8891   match(Set rax (UDivI rax div));
 8892   effect(KILL rdx, KILL cr);
 8893 
 8894   ins_cost(300);
 8895   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8896   ins_encode %{
 8897     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8898   %}
 8899   ins_pipe(ialu_reg_reg_alu0);
 8900 %}
 8901 
 8902 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8903 %{
 8904   match(Set rax (UDivL rax div));
 8905   effect(KILL rdx, KILL cr);
 8906 
 8907   ins_cost(300);
 8908   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8909   ins_encode %{
 8910      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8911   %}
 8912   ins_pipe(ialu_reg_reg_alu0);
 8913 %}
 8914 
 8915 // Integer DIVMOD with Register, both quotient and mod results
 8916 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8917                              rFlagsReg cr)
 8918 %{
 8919   match(DivModI rax div);
 8920   effect(KILL cr);
 8921 
 8922   ins_cost(30*100+10*100); // XXX
 8923   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8924             "jne,s   normal\n\t"
 8925             "xorl    rdx, rdx\n\t"
 8926             "cmpl    $div, -1\n\t"
 8927             "je,s    done\n"
 8928     "normal: cdql\n\t"
 8929             "idivl   $div\n"
 8930     "done:"        %}
 8931   ins_encode(cdql_enc(div));
 8932   ins_pipe(pipe_slow);
 8933 %}
 8934 
 8935 // Long DIVMOD with Register, both quotient and mod results
 8936 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8937                              rFlagsReg cr)
 8938 %{
 8939   match(DivModL rax div);
 8940   effect(KILL cr);
 8941 
 8942   ins_cost(30*100+10*100); // XXX
 8943   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8944             "cmpq    rax, rdx\n\t"
 8945             "jne,s   normal\n\t"
 8946             "xorl    rdx, rdx\n\t"
 8947             "cmpq    $div, -1\n\t"
 8948             "je,s    done\n"
 8949     "normal: cdqq\n\t"
 8950             "idivq   $div\n"
 8951     "done:"        %}
 8952   ins_encode(cdqq_enc(div));
 8953   ins_pipe(pipe_slow);
 8954 %}
 8955 
 8956 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8957 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 8958                               no_rax_rdx_RegI div, rFlagsReg cr)
 8959 %{
 8960   match(UDivModI rax div);
 8961   effect(TEMP tmp, KILL cr);
 8962 
 8963   ins_cost(300);
 8964   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 8965             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 8966           %}
 8967   ins_encode %{
 8968     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8969   %}
 8970   ins_pipe(pipe_slow);
 8971 %}
 8972 
 8973 // Unsigned long DIVMOD with Register, both quotient and mod results
 8974 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 8975                               no_rax_rdx_RegL div, rFlagsReg cr)
 8976 %{
 8977   match(UDivModL rax div);
 8978   effect(TEMP tmp, KILL cr);
 8979 
 8980   ins_cost(300);
 8981   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 8982             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 8983           %}
 8984   ins_encode %{
 8985     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8986   %}
 8987   ins_pipe(pipe_slow);
 8988 %}
 8989 
 8990 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 8991                    rFlagsReg cr)
 8992 %{
 8993   match(Set rdx (ModI rax div));
 8994   effect(KILL rax, KILL cr);
 8995 
 8996   ins_cost(300); // XXX
 8997   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 8998             "jne,s   normal\n\t"
 8999             "xorl    rdx, rdx\n\t"
 9000             "cmpl    $div, -1\n\t"
 9001             "je,s    done\n"
 9002     "normal: cdql\n\t"
 9003             "idivl   $div\n"
 9004     "done:"        %}
 9005   ins_encode(cdql_enc(div));
 9006   ins_pipe(ialu_reg_reg_alu0);
 9007 %}
 9008 
 9009 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 9010                    rFlagsReg cr)
 9011 %{
 9012   match(Set rdx (ModL rax div));
 9013   effect(KILL rax, KILL cr);
 9014 
 9015   ins_cost(300); // XXX
 9016   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 9017             "cmpq    rax, rdx\n\t"
 9018             "jne,s   normal\n\t"
 9019             "xorl    rdx, rdx\n\t"
 9020             "cmpq    $div, -1\n\t"
 9021             "je,s    done\n"
 9022     "normal: cdqq\n\t"
 9023             "idivq   $div\n"
 9024     "done:"        %}
 9025   ins_encode(cdqq_enc(div));
 9026   ins_pipe(ialu_reg_reg_alu0);
 9027 %}
 9028 
 9029 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 9030 %{
 9031   match(Set rdx (UModI rax div));
 9032   effect(KILL rax, KILL cr);
 9033 
 9034   ins_cost(300);
 9035   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 9036   ins_encode %{
 9037     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 9038   %}
 9039   ins_pipe(ialu_reg_reg_alu0);
 9040 %}
 9041 
 9042 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 9043 %{
 9044   match(Set rdx (UModL rax div));
 9045   effect(KILL rax, KILL cr);
 9046 
 9047   ins_cost(300);
 9048   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 9049   ins_encode %{
 9050     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 9051   %}
 9052   ins_pipe(ialu_reg_reg_alu0);
 9053 %}
 9054 
 9055 // Integer Shift Instructions
 9056 // Shift Left by one, two, three
 9057 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
 9058 %{
 9059   match(Set dst (LShiftI dst shift));
 9060   effect(KILL cr);
 9061 
 9062   format %{ "sall    $dst, $shift" %}
 9063   ins_encode %{
 9064     __ sall($dst$$Register, $shift$$constant);
 9065   %}
 9066   ins_pipe(ialu_reg);
 9067 %}
 9068 
 9069 // Shift Left by 8-bit immediate
 9070 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9071 %{
 9072   match(Set dst (LShiftI dst shift));
 9073   effect(KILL cr);
 9074 
 9075   format %{ "sall    $dst, $shift" %}
 9076   ins_encode %{
 9077     __ sall($dst$$Register, $shift$$constant);
 9078   %}
 9079   ins_pipe(ialu_reg);
 9080 %}
 9081 
 9082 // Shift Left by 8-bit immediate
 9083 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9084 %{
 9085   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9086   effect(KILL cr);
 9087 
 9088   format %{ "sall    $dst, $shift" %}
 9089   ins_encode %{
 9090     __ sall($dst$$Address, $shift$$constant);
 9091   %}
 9092   ins_pipe(ialu_mem_imm);
 9093 %}
 9094 
 9095 // Shift Left by variable
 9096 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9097 %{
 9098   predicate(!VM_Version::supports_bmi2());
 9099   match(Set dst (LShiftI dst shift));
 9100   effect(KILL cr);
 9101 
 9102   format %{ "sall    $dst, $shift" %}
 9103   ins_encode %{
 9104     __ sall($dst$$Register);
 9105   %}
 9106   ins_pipe(ialu_reg_reg);
 9107 %}
 9108 
 9109 // Shift Left by variable
 9110 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9111 %{
 9112   predicate(!VM_Version::supports_bmi2());
 9113   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9114   effect(KILL cr);
 9115 
 9116   format %{ "sall    $dst, $shift" %}
 9117   ins_encode %{
 9118     __ sall($dst$$Address);
 9119   %}
 9120   ins_pipe(ialu_mem_reg);
 9121 %}
 9122 
 9123 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9124 %{
 9125   predicate(VM_Version::supports_bmi2());
 9126   match(Set dst (LShiftI src shift));
 9127 
 9128   format %{ "shlxl   $dst, $src, $shift" %}
 9129   ins_encode %{
 9130     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 9131   %}
 9132   ins_pipe(ialu_reg_reg);
 9133 %}
 9134 
 9135 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9136 %{
 9137   predicate(VM_Version::supports_bmi2());
 9138   match(Set dst (LShiftI (LoadI src) shift));
 9139   ins_cost(175);
 9140   format %{ "shlxl   $dst, $src, $shift" %}
 9141   ins_encode %{
 9142     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 9143   %}
 9144   ins_pipe(ialu_reg_mem);
 9145 %}
 9146 
 9147 // Arithmetic Shift Right by 8-bit immediate
 9148 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9149 %{
 9150   match(Set dst (RShiftI dst shift));
 9151   effect(KILL cr);
 9152 
 9153   format %{ "sarl    $dst, $shift" %}
 9154   ins_encode %{
 9155     __ sarl($dst$$Register, $shift$$constant);
 9156   %}
 9157   ins_pipe(ialu_mem_imm);
 9158 %}
 9159 
 9160 // Arithmetic Shift Right by 8-bit immediate
 9161 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9162 %{
 9163   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9164   effect(KILL cr);
 9165 
 9166   format %{ "sarl    $dst, $shift" %}
 9167   ins_encode %{
 9168     __ sarl($dst$$Address, $shift$$constant);
 9169   %}
 9170   ins_pipe(ialu_mem_imm);
 9171 %}
 9172 
 9173 // Arithmetic Shift Right by variable
 9174 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9175 %{
 9176   predicate(!VM_Version::supports_bmi2());
 9177   match(Set dst (RShiftI dst shift));
 9178   effect(KILL cr);
 9179   format %{ "sarl    $dst, $shift" %}
 9180   ins_encode %{
 9181     __ sarl($dst$$Register);
 9182   %}
 9183   ins_pipe(ialu_reg_reg);
 9184 %}
 9185 
 9186 // Arithmetic Shift Right by variable
 9187 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9188 %{
 9189   predicate(!VM_Version::supports_bmi2());
 9190   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9191   effect(KILL cr);
 9192 
 9193   format %{ "sarl    $dst, $shift" %}
 9194   ins_encode %{
 9195     __ sarl($dst$$Address);
 9196   %}
 9197   ins_pipe(ialu_mem_reg);
 9198 %}
 9199 
 9200 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9201 %{
 9202   predicate(VM_Version::supports_bmi2());
 9203   match(Set dst (RShiftI src shift));
 9204 
 9205   format %{ "sarxl   $dst, $src, $shift" %}
 9206   ins_encode %{
 9207     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 9208   %}
 9209   ins_pipe(ialu_reg_reg);
 9210 %}
 9211 
 9212 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9213 %{
 9214   predicate(VM_Version::supports_bmi2());
 9215   match(Set dst (RShiftI (LoadI src) shift));
 9216   ins_cost(175);
 9217   format %{ "sarxl   $dst, $src, $shift" %}
 9218   ins_encode %{
 9219     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 9220   %}
 9221   ins_pipe(ialu_reg_mem);
 9222 %}
 9223 
 9224 // Logical Shift Right by 8-bit immediate
 9225 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9226 %{
 9227   match(Set dst (URShiftI dst shift));
 9228   effect(KILL cr);
 9229 
 9230   format %{ "shrl    $dst, $shift" %}
 9231   ins_encode %{
 9232     __ shrl($dst$$Register, $shift$$constant);
 9233   %}
 9234   ins_pipe(ialu_reg);
 9235 %}
 9236 
 9237 // Logical Shift Right by 8-bit immediate
 9238 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9239 %{
 9240   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9241   effect(KILL cr);
 9242 
 9243   format %{ "shrl    $dst, $shift" %}
 9244   ins_encode %{
 9245     __ shrl($dst$$Address, $shift$$constant);
 9246   %}
 9247   ins_pipe(ialu_mem_imm);
 9248 %}
 9249 
 9250 // Logical Shift Right by variable
 9251 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9252 %{
 9253   predicate(!VM_Version::supports_bmi2());
 9254   match(Set dst (URShiftI dst shift));
 9255   effect(KILL cr);
 9256 
 9257   format %{ "shrl    $dst, $shift" %}
 9258   ins_encode %{
 9259     __ shrl($dst$$Register);
 9260   %}
 9261   ins_pipe(ialu_reg_reg);
 9262 %}
 9263 
 9264 // Logical Shift Right by variable
 9265 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9266 %{
 9267   predicate(!VM_Version::supports_bmi2());
 9268   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9269   effect(KILL cr);
 9270 
 9271   format %{ "shrl    $dst, $shift" %}
 9272   ins_encode %{
 9273     __ shrl($dst$$Address);
 9274   %}
 9275   ins_pipe(ialu_mem_reg);
 9276 %}
 9277 
 9278 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9279 %{
 9280   predicate(VM_Version::supports_bmi2());
 9281   match(Set dst (URShiftI src shift));
 9282 
 9283   format %{ "shrxl   $dst, $src, $shift" %}
 9284   ins_encode %{
 9285     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 9286   %}
 9287   ins_pipe(ialu_reg_reg);
 9288 %}
 9289 
 9290 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9291 %{
 9292   predicate(VM_Version::supports_bmi2());
 9293   match(Set dst (URShiftI (LoadI src) shift));
 9294   ins_cost(175);
 9295   format %{ "shrxl   $dst, $src, $shift" %}
 9296   ins_encode %{
 9297     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 9298   %}
 9299   ins_pipe(ialu_reg_mem);
 9300 %}
 9301 
 9302 // Long Shift Instructions
 9303 // Shift Left by one, two, three
 9304 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
 9305 %{
 9306   match(Set dst (LShiftL dst shift));
 9307   effect(KILL cr);
 9308 
 9309   format %{ "salq    $dst, $shift" %}
 9310   ins_encode %{
 9311     __ salq($dst$$Register, $shift$$constant);
 9312   %}
 9313   ins_pipe(ialu_reg);
 9314 %}
 9315 
 9316 // Shift Left by 8-bit immediate
 9317 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9318 %{
 9319   match(Set dst (LShiftL dst shift));
 9320   effect(KILL cr);
 9321 
 9322   format %{ "salq    $dst, $shift" %}
 9323   ins_encode %{
 9324     __ salq($dst$$Register, $shift$$constant);
 9325   %}
 9326   ins_pipe(ialu_reg);
 9327 %}
 9328 
 9329 // Shift Left by 8-bit immediate
 9330 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9331 %{
 9332   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9333   effect(KILL cr);
 9334 
 9335   format %{ "salq    $dst, $shift" %}
 9336   ins_encode %{
 9337     __ salq($dst$$Address, $shift$$constant);
 9338   %}
 9339   ins_pipe(ialu_mem_imm);
 9340 %}
 9341 
 9342 // Shift Left by variable
 9343 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9344 %{
 9345   predicate(!VM_Version::supports_bmi2());
 9346   match(Set dst (LShiftL dst shift));
 9347   effect(KILL cr);
 9348 
 9349   format %{ "salq    $dst, $shift" %}
 9350   ins_encode %{
 9351     __ salq($dst$$Register);
 9352   %}
 9353   ins_pipe(ialu_reg_reg);
 9354 %}
 9355 
 9356 // Shift Left by variable
 9357 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9358 %{
 9359   predicate(!VM_Version::supports_bmi2());
 9360   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9361   effect(KILL cr);
 9362 
 9363   format %{ "salq    $dst, $shift" %}
 9364   ins_encode %{
 9365     __ salq($dst$$Address);
 9366   %}
 9367   ins_pipe(ialu_mem_reg);
 9368 %}
 9369 
 9370 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9371 %{
 9372   predicate(VM_Version::supports_bmi2());
 9373   match(Set dst (LShiftL src shift));
 9374 
 9375   format %{ "shlxq   $dst, $src, $shift" %}
 9376   ins_encode %{
 9377     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 9378   %}
 9379   ins_pipe(ialu_reg_reg);
 9380 %}
 9381 
 9382 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9383 %{
 9384   predicate(VM_Version::supports_bmi2());
 9385   match(Set dst (LShiftL (LoadL src) shift));
 9386   ins_cost(175);
 9387   format %{ "shlxq   $dst, $src, $shift" %}
 9388   ins_encode %{
 9389     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 9390   %}
 9391   ins_pipe(ialu_reg_mem);
 9392 %}
 9393 
 9394 // Arithmetic Shift Right by 8-bit immediate
 9395 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 9396 %{
 9397   match(Set dst (RShiftL dst shift));
 9398   effect(KILL cr);
 9399 
 9400   format %{ "sarq    $dst, $shift" %}
 9401   ins_encode %{
 9402     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 9403   %}
 9404   ins_pipe(ialu_mem_imm);
 9405 %}
 9406 
 9407 // Arithmetic Shift Right by 8-bit immediate
 9408 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 9409 %{
 9410   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9411   effect(KILL cr);
 9412 
 9413   format %{ "sarq    $dst, $shift" %}
 9414   ins_encode %{
 9415     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 9416   %}
 9417   ins_pipe(ialu_mem_imm);
 9418 %}
 9419 
 9420 // Arithmetic Shift Right by variable
 9421 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9422 %{
 9423   predicate(!VM_Version::supports_bmi2());
 9424   match(Set dst (RShiftL dst shift));
 9425   effect(KILL cr);
 9426 
 9427   format %{ "sarq    $dst, $shift" %}
 9428   ins_encode %{
 9429     __ sarq($dst$$Register);
 9430   %}
 9431   ins_pipe(ialu_reg_reg);
 9432 %}
 9433 
 9434 // Arithmetic Shift Right by variable
 9435 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9436 %{
 9437   predicate(!VM_Version::supports_bmi2());
 9438   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9439   effect(KILL cr);
 9440 
 9441   format %{ "sarq    $dst, $shift" %}
 9442   ins_encode %{
 9443     __ sarq($dst$$Address);
 9444   %}
 9445   ins_pipe(ialu_mem_reg);
 9446 %}
 9447 
 9448 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9449 %{
 9450   predicate(VM_Version::supports_bmi2());
 9451   match(Set dst (RShiftL src shift));
 9452 
 9453   format %{ "sarxq   $dst, $src, $shift" %}
 9454   ins_encode %{
 9455     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 9456   %}
 9457   ins_pipe(ialu_reg_reg);
 9458 %}
 9459 
 9460 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9461 %{
 9462   predicate(VM_Version::supports_bmi2());
 9463   match(Set dst (RShiftL (LoadL src) shift));
 9464   ins_cost(175);
 9465   format %{ "sarxq   $dst, $src, $shift" %}
 9466   ins_encode %{
 9467     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 9468   %}
 9469   ins_pipe(ialu_reg_mem);
 9470 %}
 9471 
 9472 // Logical Shift Right by 8-bit immediate
 9473 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9474 %{
 9475   match(Set dst (URShiftL dst shift));
 9476   effect(KILL cr);
 9477 
 9478   format %{ "shrq    $dst, $shift" %}
 9479   ins_encode %{
 9480     __ shrq($dst$$Register, $shift$$constant);
 9481   %}
 9482   ins_pipe(ialu_reg);
 9483 %}
 9484 
 9485 // Logical Shift Right by 8-bit immediate
 9486 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9487 %{
 9488   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9489   effect(KILL cr);
 9490 
 9491   format %{ "shrq    $dst, $shift" %}
 9492   ins_encode %{
 9493     __ shrq($dst$$Address, $shift$$constant);
 9494   %}
 9495   ins_pipe(ialu_mem_imm);
 9496 %}
 9497 
 9498 // Logical Shift Right by variable
 9499 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9500 %{
 9501   predicate(!VM_Version::supports_bmi2());
 9502   match(Set dst (URShiftL dst shift));
 9503   effect(KILL cr);
 9504 
 9505   format %{ "shrq    $dst, $shift" %}
 9506   ins_encode %{
 9507     __ shrq($dst$$Register);
 9508   %}
 9509   ins_pipe(ialu_reg_reg);
 9510 %}
 9511 
 9512 // Logical Shift Right by variable
 9513 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9514 %{
 9515   predicate(!VM_Version::supports_bmi2());
 9516   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9517   effect(KILL cr);
 9518 
 9519   format %{ "shrq    $dst, $shift" %}
 9520   ins_encode %{
 9521     __ shrq($dst$$Address);
 9522   %}
 9523   ins_pipe(ialu_mem_reg);
 9524 %}
 9525 
 9526 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9527 %{
 9528   predicate(VM_Version::supports_bmi2());
 9529   match(Set dst (URShiftL src shift));
 9530 
 9531   format %{ "shrxq   $dst, $src, $shift" %}
 9532   ins_encode %{
 9533     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 9534   %}
 9535   ins_pipe(ialu_reg_reg);
 9536 %}
 9537 
 9538 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9539 %{
 9540   predicate(VM_Version::supports_bmi2());
 9541   match(Set dst (URShiftL (LoadL src) shift));
 9542   ins_cost(175);
 9543   format %{ "shrxq   $dst, $src, $shift" %}
 9544   ins_encode %{
 9545     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 9546   %}
 9547   ins_pipe(ialu_reg_mem);
 9548 %}
 9549 
 9550 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 9551 // This idiom is used by the compiler for the i2b bytecode.
 9552 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 9553 %{
 9554   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 9555 
 9556   format %{ "movsbl  $dst, $src\t# i2b" %}
 9557   ins_encode %{
 9558     __ movsbl($dst$$Register, $src$$Register);
 9559   %}
 9560   ins_pipe(ialu_reg_reg);
 9561 %}
 9562 
 9563 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 9564 // This idiom is used by the compiler the i2s bytecode.
 9565 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 9566 %{
 9567   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 9568 
 9569   format %{ "movswl  $dst, $src\t# i2s" %}
 9570   ins_encode %{
 9571     __ movswl($dst$$Register, $src$$Register);
 9572   %}
 9573   ins_pipe(ialu_reg_reg);
 9574 %}
 9575 
 9576 // ROL/ROR instructions
 9577 
 9578 // Rotate left by constant.
 9579 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9580 %{
 9581   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9582   match(Set dst (RotateLeft dst shift));
 9583   effect(KILL cr);
 9584   format %{ "roll    $dst, $shift" %}
 9585   ins_encode %{
 9586     __ roll($dst$$Register, $shift$$constant);
 9587   %}
 9588   ins_pipe(ialu_reg);
 9589 %}
 9590 
 9591 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 9592 %{
 9593   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9594   match(Set dst (RotateLeft src shift));
 9595   format %{ "rolxl   $dst, $src, $shift" %}
 9596   ins_encode %{
 9597     int shift = 32 - ($shift$$constant & 31);
 9598     __ rorxl($dst$$Register, $src$$Register, shift);
 9599   %}
 9600   ins_pipe(ialu_reg_reg);
 9601 %}
 9602 
 9603 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9604 %{
 9605   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9606   match(Set dst (RotateLeft (LoadI src) shift));
 9607   ins_cost(175);
 9608   format %{ "rolxl   $dst, $src, $shift" %}
 9609   ins_encode %{
 9610     int shift = 32 - ($shift$$constant & 31);
 9611     __ rorxl($dst$$Register, $src$$Address, shift);
 9612   %}
 9613   ins_pipe(ialu_reg_mem);
 9614 %}
 9615 
 9616 // Rotate Left by variable
 9617 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9618 %{
 9619   predicate(n->bottom_type()->basic_type() == T_INT);
 9620   match(Set dst (RotateLeft dst shift));
 9621   effect(KILL cr);
 9622   format %{ "roll    $dst, $shift" %}
 9623   ins_encode %{
 9624     __ roll($dst$$Register);
 9625   %}
 9626   ins_pipe(ialu_reg_reg);
 9627 %}
 9628 
 9629 // Rotate Right by constant.
 9630 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9631 %{
 9632   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9633   match(Set dst (RotateRight dst shift));
 9634   effect(KILL cr);
 9635   format %{ "rorl    $dst, $shift" %}
 9636   ins_encode %{
 9637     __ rorl($dst$$Register, $shift$$constant);
 9638   %}
 9639   ins_pipe(ialu_reg);
 9640 %}
 9641 
 9642 // Rotate Right by constant.
 9643 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 9644 %{
 9645   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9646   match(Set dst (RotateRight src shift));
 9647   format %{ "rorxl   $dst, $src, $shift" %}
 9648   ins_encode %{
 9649     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 9650   %}
 9651   ins_pipe(ialu_reg_reg);
 9652 %}
 9653 
 9654 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9655 %{
 9656   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9657   match(Set dst (RotateRight (LoadI src) shift));
 9658   ins_cost(175);
 9659   format %{ "rorxl   $dst, $src, $shift" %}
 9660   ins_encode %{
 9661     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 9662   %}
 9663   ins_pipe(ialu_reg_mem);
 9664 %}
 9665 
 9666 // Rotate Right by variable
 9667 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9668 %{
 9669   predicate(n->bottom_type()->basic_type() == T_INT);
 9670   match(Set dst (RotateRight dst shift));
 9671   effect(KILL cr);
 9672   format %{ "rorl    $dst, $shift" %}
 9673   ins_encode %{
 9674     __ rorl($dst$$Register);
 9675   %}
 9676   ins_pipe(ialu_reg_reg);
 9677 %}
 9678 
 9679 // Rotate Left by constant.
 9680 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9681 %{
 9682   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9683   match(Set dst (RotateLeft dst shift));
 9684   effect(KILL cr);
 9685   format %{ "rolq    $dst, $shift" %}
 9686   ins_encode %{
 9687     __ rolq($dst$$Register, $shift$$constant);
 9688   %}
 9689   ins_pipe(ialu_reg);
 9690 %}
 9691 
 9692 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 9693 %{
 9694   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9695   match(Set dst (RotateLeft src shift));
 9696   format %{ "rolxq   $dst, $src, $shift" %}
 9697   ins_encode %{
 9698     int shift = 64 - ($shift$$constant & 63);
 9699     __ rorxq($dst$$Register, $src$$Register, shift);
 9700   %}
 9701   ins_pipe(ialu_reg_reg);
 9702 %}
 9703 
 9704 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9705 %{
 9706   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9707   match(Set dst (RotateLeft (LoadL src) shift));
 9708   ins_cost(175);
 9709   format %{ "rolxq   $dst, $src, $shift" %}
 9710   ins_encode %{
 9711     int shift = 64 - ($shift$$constant & 63);
 9712     __ rorxq($dst$$Register, $src$$Address, shift);
 9713   %}
 9714   ins_pipe(ialu_reg_mem);
 9715 %}
 9716 
 9717 // Rotate Left by variable
 9718 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9719 %{
 9720   predicate(n->bottom_type()->basic_type() == T_LONG);
 9721   match(Set dst (RotateLeft dst shift));
 9722   effect(KILL cr);
 9723   format %{ "rolq    $dst, $shift" %}
 9724   ins_encode %{
 9725     __ rolq($dst$$Register);
 9726   %}
 9727   ins_pipe(ialu_reg_reg);
 9728 %}
 9729 
 9730 // Rotate Right by constant.
 9731 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9732 %{
 9733   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9734   match(Set dst (RotateRight dst shift));
 9735   effect(KILL cr);
 9736   format %{ "rorq    $dst, $shift" %}
 9737   ins_encode %{
 9738     __ rorq($dst$$Register, $shift$$constant);
 9739   %}
 9740   ins_pipe(ialu_reg);
 9741 %}
 9742 
 9743 // Rotate Right by constant
 9744 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 9745 %{
 9746   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9747   match(Set dst (RotateRight src shift));
 9748   format %{ "rorxq   $dst, $src, $shift" %}
 9749   ins_encode %{
 9750     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 9751   %}
 9752   ins_pipe(ialu_reg_reg);
 9753 %}
 9754 
 9755 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9756 %{
 9757   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9758   match(Set dst (RotateRight (LoadL src) shift));
 9759   ins_cost(175);
 9760   format %{ "rorxq   $dst, $src, $shift" %}
 9761   ins_encode %{
 9762     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 9763   %}
 9764   ins_pipe(ialu_reg_mem);
 9765 %}
 9766 
 9767 // Rotate Right by variable
 9768 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9769 %{
 9770   predicate(n->bottom_type()->basic_type() == T_LONG);
 9771   match(Set dst (RotateRight dst shift));
 9772   effect(KILL cr);
 9773   format %{ "rorq    $dst, $shift" %}
 9774   ins_encode %{
 9775     __ rorq($dst$$Register);
 9776   %}
 9777   ins_pipe(ialu_reg_reg);
 9778 %}
 9779 
 9780 //----------------------------- CompressBits/ExpandBits ------------------------
 9781 
 9782 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9783   predicate(n->bottom_type()->isa_long());
 9784   match(Set dst (CompressBits src mask));
 9785   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9786   ins_encode %{
 9787     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 9788   %}
 9789   ins_pipe( pipe_slow );
 9790 %}
 9791 
 9792 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9793   predicate(n->bottom_type()->isa_long());
 9794   match(Set dst (ExpandBits src mask));
 9795   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9796   ins_encode %{
 9797     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 9798   %}
 9799   ins_pipe( pipe_slow );
 9800 %}
 9801 
 9802 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9803   predicate(n->bottom_type()->isa_long());
 9804   match(Set dst (CompressBits src (LoadL mask)));
 9805   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9806   ins_encode %{
 9807     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 9808   %}
 9809   ins_pipe( pipe_slow );
 9810 %}
 9811 
 9812 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9813   predicate(n->bottom_type()->isa_long());
 9814   match(Set dst (ExpandBits src (LoadL mask)));
 9815   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9816   ins_encode %{
 9817     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 9818   %}
 9819   ins_pipe( pipe_slow );
 9820 %}
 9821 
 9822 
 9823 // Logical Instructions
 9824 
 9825 // Integer Logical Instructions
 9826 
 9827 // And Instructions
 9828 // And Register with Register
 9829 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9830 %{
 9831   match(Set dst (AndI dst src));
 9832   effect(KILL cr);
 9833 
 9834   format %{ "andl    $dst, $src\t# int" %}
 9835   ins_encode %{
 9836     __ andl($dst$$Register, $src$$Register);
 9837   %}
 9838   ins_pipe(ialu_reg_reg);
 9839 %}
 9840 
 9841 // And Register with Immediate 255
 9842 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9843 %{
 9844   match(Set dst (AndI src mask));
 9845 
 9846   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9847   ins_encode %{
 9848     __ movzbl($dst$$Register, $src$$Register);
 9849   %}
 9850   ins_pipe(ialu_reg);
 9851 %}
 9852 
 9853 // And Register with Immediate 255 and promote to long
 9854 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9855 %{
 9856   match(Set dst (ConvI2L (AndI src mask)));
 9857 
 9858   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9859   ins_encode %{
 9860     __ movzbl($dst$$Register, $src$$Register);
 9861   %}
 9862   ins_pipe(ialu_reg);
 9863 %}
 9864 
 9865 // And Register with Immediate 65535
 9866 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9867 %{
 9868   match(Set dst (AndI src mask));
 9869 
 9870   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9871   ins_encode %{
 9872     __ movzwl($dst$$Register, $src$$Register);
 9873   %}
 9874   ins_pipe(ialu_reg);
 9875 %}
 9876 
 9877 // And Register with Immediate 65535 and promote to long
 9878 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9879 %{
 9880   match(Set dst (ConvI2L (AndI src mask)));
 9881 
 9882   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9883   ins_encode %{
 9884     __ movzwl($dst$$Register, $src$$Register);
 9885   %}
 9886   ins_pipe(ialu_reg);
 9887 %}
 9888 
 9889 // Can skip int2long conversions after AND with small bitmask
 9890 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9891 %{
 9892   predicate(VM_Version::supports_bmi2());
 9893   ins_cost(125);
 9894   effect(TEMP tmp, KILL cr);
 9895   match(Set dst (ConvI2L (AndI src mask)));
 9896   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9897   ins_encode %{
 9898     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9899     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9900   %}
 9901   ins_pipe(ialu_reg_reg);
 9902 %}
 9903 
 9904 // And Register with Immediate
 9905 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9906 %{
 9907   match(Set dst (AndI dst src));
 9908   effect(KILL cr);
 9909 
 9910   format %{ "andl    $dst, $src\t# int" %}
 9911   ins_encode %{
 9912     __ andl($dst$$Register, $src$$constant);
 9913   %}
 9914   ins_pipe(ialu_reg);
 9915 %}
 9916 
 9917 // And Register with Memory
 9918 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9919 %{
 9920   match(Set dst (AndI dst (LoadI src)));
 9921   effect(KILL cr);
 9922 
 9923   ins_cost(150);
 9924   format %{ "andl    $dst, $src\t# int" %}
 9925   ins_encode %{
 9926     __ andl($dst$$Register, $src$$Address);
 9927   %}
 9928   ins_pipe(ialu_reg_mem);
 9929 %}
 9930 
 9931 // And Memory with Register
 9932 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9933 %{
 9934   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9935   effect(KILL cr);
 9936 
 9937   ins_cost(150);
 9938   format %{ "andb    $dst, $src\t# byte" %}
 9939   ins_encode %{
 9940     __ andb($dst$$Address, $src$$Register);
 9941   %}
 9942   ins_pipe(ialu_mem_reg);
 9943 %}
 9944 
 9945 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9946 %{
 9947   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9948   effect(KILL cr);
 9949 
 9950   ins_cost(150);
 9951   format %{ "andl    $dst, $src\t# int" %}
 9952   ins_encode %{
 9953     __ andl($dst$$Address, $src$$Register);
 9954   %}
 9955   ins_pipe(ialu_mem_reg);
 9956 %}
 9957 
 9958 // And Memory with Immediate
 9959 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9960 %{
 9961   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9962   effect(KILL cr);
 9963 
 9964   ins_cost(125);
 9965   format %{ "andl    $dst, $src\t# int" %}
 9966   ins_encode %{
 9967     __ andl($dst$$Address, $src$$constant);
 9968   %}
 9969   ins_pipe(ialu_mem_imm);
 9970 %}
 9971 
 9972 // BMI1 instructions
 9973 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
 9974   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
 9975   predicate(UseBMI1Instructions);
 9976   effect(KILL cr);
 9977 
 9978   ins_cost(125);
 9979   format %{ "andnl  $dst, $src1, $src2" %}
 9980 
 9981   ins_encode %{
 9982     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 9983   %}
 9984   ins_pipe(ialu_reg_mem);
 9985 %}
 9986 
 9987 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
 9988   match(Set dst (AndI (XorI src1 minus_1) src2));
 9989   predicate(UseBMI1Instructions);
 9990   effect(KILL cr);
 9991 
 9992   format %{ "andnl  $dst, $src1, $src2" %}
 9993 
 9994   ins_encode %{
 9995     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 9996   %}
 9997   ins_pipe(ialu_reg);
 9998 %}
 9999 
10000 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
10001   match(Set dst (AndI (SubI imm_zero src) src));
10002   predicate(UseBMI1Instructions);
10003   effect(KILL cr);
10004 
10005   format %{ "blsil  $dst, $src" %}
10006 
10007   ins_encode %{
10008     __ blsil($dst$$Register, $src$$Register);
10009   %}
10010   ins_pipe(ialu_reg);
10011 %}
10012 
10013 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
10014   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
10015   predicate(UseBMI1Instructions);
10016   effect(KILL cr);
10017 
10018   ins_cost(125);
10019   format %{ "blsil  $dst, $src" %}
10020 
10021   ins_encode %{
10022     __ blsil($dst$$Register, $src$$Address);
10023   %}
10024   ins_pipe(ialu_reg_mem);
10025 %}
10026 
10027 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10028 %{
10029   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
10030   predicate(UseBMI1Instructions);
10031   effect(KILL cr);
10032 
10033   ins_cost(125);
10034   format %{ "blsmskl $dst, $src" %}
10035 
10036   ins_encode %{
10037     __ blsmskl($dst$$Register, $src$$Address);
10038   %}
10039   ins_pipe(ialu_reg_mem);
10040 %}
10041 
10042 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10043 %{
10044   match(Set dst (XorI (AddI src minus_1) src));
10045   predicate(UseBMI1Instructions);
10046   effect(KILL cr);
10047 
10048   format %{ "blsmskl $dst, $src" %}
10049 
10050   ins_encode %{
10051     __ blsmskl($dst$$Register, $src$$Register);
10052   %}
10053 
10054   ins_pipe(ialu_reg);
10055 %}
10056 
10057 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10058 %{
10059   match(Set dst (AndI (AddI src minus_1) src) );
10060   predicate(UseBMI1Instructions);
10061   effect(KILL cr);
10062 
10063   format %{ "blsrl  $dst, $src" %}
10064 
10065   ins_encode %{
10066     __ blsrl($dst$$Register, $src$$Register);
10067   %}
10068 
10069   ins_pipe(ialu_reg_mem);
10070 %}
10071 
10072 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10073 %{
10074   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
10075   predicate(UseBMI1Instructions);
10076   effect(KILL cr);
10077 
10078   ins_cost(125);
10079   format %{ "blsrl  $dst, $src" %}
10080 
10081   ins_encode %{
10082     __ blsrl($dst$$Register, $src$$Address);
10083   %}
10084 
10085   ins_pipe(ialu_reg);
10086 %}
10087 
10088 // Or Instructions
10089 // Or Register with Register
10090 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10091 %{
10092   match(Set dst (OrI dst src));
10093   effect(KILL cr);
10094 
10095   format %{ "orl     $dst, $src\t# int" %}
10096   ins_encode %{
10097     __ orl($dst$$Register, $src$$Register);
10098   %}
10099   ins_pipe(ialu_reg_reg);
10100 %}
10101 
10102 // Or Register with Immediate
10103 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10104 %{
10105   match(Set dst (OrI dst src));
10106   effect(KILL cr);
10107 
10108   format %{ "orl     $dst, $src\t# int" %}
10109   ins_encode %{
10110     __ orl($dst$$Register, $src$$constant);
10111   %}
10112   ins_pipe(ialu_reg);
10113 %}
10114 
10115 // Or Register with Memory
10116 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10117 %{
10118   match(Set dst (OrI dst (LoadI src)));
10119   effect(KILL cr);
10120 
10121   ins_cost(150);
10122   format %{ "orl     $dst, $src\t# int" %}
10123   ins_encode %{
10124     __ orl($dst$$Register, $src$$Address);
10125   %}
10126   ins_pipe(ialu_reg_mem);
10127 %}
10128 
10129 // Or Memory with Register
10130 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10131 %{
10132   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
10133   effect(KILL cr);
10134 
10135   ins_cost(150);
10136   format %{ "orb    $dst, $src\t# byte" %}
10137   ins_encode %{
10138     __ orb($dst$$Address, $src$$Register);
10139   %}
10140   ins_pipe(ialu_mem_reg);
10141 %}
10142 
10143 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10144 %{
10145   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10146   effect(KILL cr);
10147 
10148   ins_cost(150);
10149   format %{ "orl     $dst, $src\t# int" %}
10150   ins_encode %{
10151     __ orl($dst$$Address, $src$$Register);
10152   %}
10153   ins_pipe(ialu_mem_reg);
10154 %}
10155 
10156 // Or Memory with Immediate
10157 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10158 %{
10159   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10160   effect(KILL cr);
10161 
10162   ins_cost(125);
10163   format %{ "orl     $dst, $src\t# int" %}
10164   ins_encode %{
10165     __ orl($dst$$Address, $src$$constant);
10166   %}
10167   ins_pipe(ialu_mem_imm);
10168 %}
10169 
10170 // Xor Instructions
10171 // Xor Register with Register
10172 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10173 %{
10174   match(Set dst (XorI dst src));
10175   effect(KILL cr);
10176 
10177   format %{ "xorl    $dst, $src\t# int" %}
10178   ins_encode %{
10179     __ xorl($dst$$Register, $src$$Register);
10180   %}
10181   ins_pipe(ialu_reg_reg);
10182 %}
10183 
10184 // Xor Register with Immediate -1
10185 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10186   match(Set dst (XorI dst imm));
10187 
10188   format %{ "not    $dst" %}
10189   ins_encode %{
10190      __ notl($dst$$Register);
10191   %}
10192   ins_pipe(ialu_reg);
10193 %}
10194 
10195 // Xor Register with Immediate
10196 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10197 %{
10198   match(Set dst (XorI dst src));
10199   effect(KILL cr);
10200 
10201   format %{ "xorl    $dst, $src\t# int" %}
10202   ins_encode %{
10203     __ xorl($dst$$Register, $src$$constant);
10204   %}
10205   ins_pipe(ialu_reg);
10206 %}
10207 
10208 // Xor Register with Memory
10209 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10210 %{
10211   match(Set dst (XorI dst (LoadI src)));
10212   effect(KILL cr);
10213 
10214   ins_cost(150);
10215   format %{ "xorl    $dst, $src\t# int" %}
10216   ins_encode %{
10217     __ xorl($dst$$Register, $src$$Address);
10218   %}
10219   ins_pipe(ialu_reg_mem);
10220 %}
10221 
10222 // Xor Memory with Register
10223 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10224 %{
10225   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
10226   effect(KILL cr);
10227 
10228   ins_cost(150);
10229   format %{ "xorb    $dst, $src\t# byte" %}
10230   ins_encode %{
10231     __ xorb($dst$$Address, $src$$Register);
10232   %}
10233   ins_pipe(ialu_mem_reg);
10234 %}
10235 
10236 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10237 %{
10238   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10239   effect(KILL cr);
10240 
10241   ins_cost(150);
10242   format %{ "xorl    $dst, $src\t# int" %}
10243   ins_encode %{
10244     __ xorl($dst$$Address, $src$$Register);
10245   %}
10246   ins_pipe(ialu_mem_reg);
10247 %}
10248 
10249 // Xor Memory with Immediate
10250 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10251 %{
10252   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10253   effect(KILL cr);
10254 
10255   ins_cost(125);
10256   format %{ "xorl    $dst, $src\t# int" %}
10257   ins_encode %{
10258     __ xorl($dst$$Address, $src$$constant);
10259   %}
10260   ins_pipe(ialu_mem_imm);
10261 %}
10262 
10263 
10264 // Long Logical Instructions
10265 
10266 // And Instructions
10267 // And Register with Register
10268 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10269 %{
10270   match(Set dst (AndL dst src));
10271   effect(KILL cr);
10272 
10273   format %{ "andq    $dst, $src\t# long" %}
10274   ins_encode %{
10275     __ andq($dst$$Register, $src$$Register);
10276   %}
10277   ins_pipe(ialu_reg_reg);
10278 %}
10279 
10280 // And Register with Immediate 255
10281 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
10282 %{
10283   match(Set dst (AndL src mask));
10284 
10285   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
10286   ins_encode %{
10287     // movzbl zeroes out the upper 32-bit and does not need REX.W
10288     __ movzbl($dst$$Register, $src$$Register);
10289   %}
10290   ins_pipe(ialu_reg);
10291 %}
10292 
10293 // And Register with Immediate 65535
10294 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
10295 %{
10296   match(Set dst (AndL src mask));
10297 
10298   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
10299   ins_encode %{
10300     // movzwl zeroes out the upper 32-bit and does not need REX.W
10301     __ movzwl($dst$$Register, $src$$Register);
10302   %}
10303   ins_pipe(ialu_reg);
10304 %}
10305 
10306 // And Register with Immediate
10307 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10308 %{
10309   match(Set dst (AndL dst src));
10310   effect(KILL cr);
10311 
10312   format %{ "andq    $dst, $src\t# long" %}
10313   ins_encode %{
10314     __ andq($dst$$Register, $src$$constant);
10315   %}
10316   ins_pipe(ialu_reg);
10317 %}
10318 
10319 // And Register with Memory
10320 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10321 %{
10322   match(Set dst (AndL dst (LoadL src)));
10323   effect(KILL cr);
10324 
10325   ins_cost(150);
10326   format %{ "andq    $dst, $src\t# long" %}
10327   ins_encode %{
10328     __ andq($dst$$Register, $src$$Address);
10329   %}
10330   ins_pipe(ialu_reg_mem);
10331 %}
10332 
10333 // And Memory with Register
10334 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10335 %{
10336   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10337   effect(KILL cr);
10338 
10339   ins_cost(150);
10340   format %{ "andq    $dst, $src\t# long" %}
10341   ins_encode %{
10342     __ andq($dst$$Address, $src$$Register);
10343   %}
10344   ins_pipe(ialu_mem_reg);
10345 %}
10346 
10347 // And Memory with Immediate
10348 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10349 %{
10350   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10351   effect(KILL cr);
10352 
10353   ins_cost(125);
10354   format %{ "andq    $dst, $src\t# long" %}
10355   ins_encode %{
10356     __ andq($dst$$Address, $src$$constant);
10357   %}
10358   ins_pipe(ialu_mem_imm);
10359 %}
10360 
10361 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
10362 %{
10363   // con should be a pure 64-bit immediate given that not(con) is a power of 2
10364   // because AND/OR works well enough for 8/32-bit values.
10365   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
10366 
10367   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
10368   effect(KILL cr);
10369 
10370   ins_cost(125);
10371   format %{ "btrq    $dst, log2(not($con))\t# long" %}
10372   ins_encode %{
10373     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
10374   %}
10375   ins_pipe(ialu_mem_imm);
10376 %}
10377 
10378 // BMI1 instructions
10379 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10380   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10381   predicate(UseBMI1Instructions);
10382   effect(KILL cr);
10383 
10384   ins_cost(125);
10385   format %{ "andnq  $dst, $src1, $src2" %}
10386 
10387   ins_encode %{
10388     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10389   %}
10390   ins_pipe(ialu_reg_mem);
10391 %}
10392 
10393 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10394   match(Set dst (AndL (XorL src1 minus_1) src2));
10395   predicate(UseBMI1Instructions);
10396   effect(KILL cr);
10397 
10398   format %{ "andnq  $dst, $src1, $src2" %}
10399 
10400   ins_encode %{
10401   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10402   %}
10403   ins_pipe(ialu_reg_mem);
10404 %}
10405 
10406 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10407   match(Set dst (AndL (SubL imm_zero src) src));
10408   predicate(UseBMI1Instructions);
10409   effect(KILL cr);
10410 
10411   format %{ "blsiq  $dst, $src" %}
10412 
10413   ins_encode %{
10414     __ blsiq($dst$$Register, $src$$Register);
10415   %}
10416   ins_pipe(ialu_reg);
10417 %}
10418 
10419 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10420   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10421   predicate(UseBMI1Instructions);
10422   effect(KILL cr);
10423 
10424   ins_cost(125);
10425   format %{ "blsiq  $dst, $src" %}
10426 
10427   ins_encode %{
10428     __ blsiq($dst$$Register, $src$$Address);
10429   %}
10430   ins_pipe(ialu_reg_mem);
10431 %}
10432 
10433 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10434 %{
10435   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10436   predicate(UseBMI1Instructions);
10437   effect(KILL cr);
10438 
10439   ins_cost(125);
10440   format %{ "blsmskq $dst, $src" %}
10441 
10442   ins_encode %{
10443     __ blsmskq($dst$$Register, $src$$Address);
10444   %}
10445   ins_pipe(ialu_reg_mem);
10446 %}
10447 
10448 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10449 %{
10450   match(Set dst (XorL (AddL src minus_1) src));
10451   predicate(UseBMI1Instructions);
10452   effect(KILL cr);
10453 
10454   format %{ "blsmskq $dst, $src" %}
10455 
10456   ins_encode %{
10457     __ blsmskq($dst$$Register, $src$$Register);
10458   %}
10459 
10460   ins_pipe(ialu_reg);
10461 %}
10462 
10463 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10464 %{
10465   match(Set dst (AndL (AddL src minus_1) src) );
10466   predicate(UseBMI1Instructions);
10467   effect(KILL cr);
10468 
10469   format %{ "blsrq  $dst, $src" %}
10470 
10471   ins_encode %{
10472     __ blsrq($dst$$Register, $src$$Register);
10473   %}
10474 
10475   ins_pipe(ialu_reg);
10476 %}
10477 
10478 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10479 %{
10480   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10481   predicate(UseBMI1Instructions);
10482   effect(KILL cr);
10483 
10484   ins_cost(125);
10485   format %{ "blsrq  $dst, $src" %}
10486 
10487   ins_encode %{
10488     __ blsrq($dst$$Register, $src$$Address);
10489   %}
10490 
10491   ins_pipe(ialu_reg);
10492 %}
10493 
10494 // Or Instructions
10495 // Or Register with Register
10496 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10497 %{
10498   match(Set dst (OrL dst src));
10499   effect(KILL cr);
10500 
10501   format %{ "orq     $dst, $src\t# long" %}
10502   ins_encode %{
10503     __ orq($dst$$Register, $src$$Register);
10504   %}
10505   ins_pipe(ialu_reg_reg);
10506 %}
10507 
10508 // Use any_RegP to match R15 (TLS register) without spilling.
10509 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10510   match(Set dst (OrL dst (CastP2X src)));
10511   effect(KILL cr);
10512 
10513   format %{ "orq     $dst, $src\t# long" %}
10514   ins_encode %{
10515     __ orq($dst$$Register, $src$$Register);
10516   %}
10517   ins_pipe(ialu_reg_reg);
10518 %}
10519 
10520 
10521 // Or Register with Immediate
10522 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10523 %{
10524   match(Set dst (OrL dst src));
10525   effect(KILL cr);
10526 
10527   format %{ "orq     $dst, $src\t# long" %}
10528   ins_encode %{
10529     __ orq($dst$$Register, $src$$constant);
10530   %}
10531   ins_pipe(ialu_reg);
10532 %}
10533 
10534 // Or Register with Memory
10535 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10536 %{
10537   match(Set dst (OrL dst (LoadL src)));
10538   effect(KILL cr);
10539 
10540   ins_cost(150);
10541   format %{ "orq     $dst, $src\t# long" %}
10542   ins_encode %{
10543     __ orq($dst$$Register, $src$$Address);
10544   %}
10545   ins_pipe(ialu_reg_mem);
10546 %}
10547 
10548 // Or Memory with Register
10549 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10550 %{
10551   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10552   effect(KILL cr);
10553 
10554   ins_cost(150);
10555   format %{ "orq     $dst, $src\t# long" %}
10556   ins_encode %{
10557     __ orq($dst$$Address, $src$$Register);
10558   %}
10559   ins_pipe(ialu_mem_reg);
10560 %}
10561 
10562 // Or Memory with Immediate
10563 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10564 %{
10565   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10566   effect(KILL cr);
10567 
10568   ins_cost(125);
10569   format %{ "orq     $dst, $src\t# long" %}
10570   ins_encode %{
10571     __ orq($dst$$Address, $src$$constant);
10572   %}
10573   ins_pipe(ialu_mem_imm);
10574 %}
10575 
10576 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
10577 %{
10578   // con should be a pure 64-bit power of 2 immediate
10579   // because AND/OR works well enough for 8/32-bit values.
10580   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
10581 
10582   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
10583   effect(KILL cr);
10584 
10585   ins_cost(125);
10586   format %{ "btsq    $dst, log2($con)\t# long" %}
10587   ins_encode %{
10588     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
10589   %}
10590   ins_pipe(ialu_mem_imm);
10591 %}
10592 
10593 // Xor Instructions
10594 // Xor Register with Register
10595 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10596 %{
10597   match(Set dst (XorL dst src));
10598   effect(KILL cr);
10599 
10600   format %{ "xorq    $dst, $src\t# long" %}
10601   ins_encode %{
10602     __ xorq($dst$$Register, $src$$Register);
10603   %}
10604   ins_pipe(ialu_reg_reg);
10605 %}
10606 
10607 // Xor Register with Immediate -1
10608 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10609   match(Set dst (XorL dst imm));
10610 
10611   format %{ "notq   $dst" %}
10612   ins_encode %{
10613      __ notq($dst$$Register);
10614   %}
10615   ins_pipe(ialu_reg);
10616 %}
10617 
10618 // Xor Register with Immediate
10619 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10620 %{
10621   match(Set dst (XorL dst src));
10622   effect(KILL cr);
10623 
10624   format %{ "xorq    $dst, $src\t# long" %}
10625   ins_encode %{
10626     __ xorq($dst$$Register, $src$$constant);
10627   %}
10628   ins_pipe(ialu_reg);
10629 %}
10630 
10631 // Xor Register with Memory
10632 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10633 %{
10634   match(Set dst (XorL dst (LoadL src)));
10635   effect(KILL cr);
10636 
10637   ins_cost(150);
10638   format %{ "xorq    $dst, $src\t# long" %}
10639   ins_encode %{
10640     __ xorq($dst$$Register, $src$$Address);
10641   %}
10642   ins_pipe(ialu_reg_mem);
10643 %}
10644 
10645 // Xor Memory with Register
10646 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10647 %{
10648   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10649   effect(KILL cr);
10650 
10651   ins_cost(150);
10652   format %{ "xorq    $dst, $src\t# long" %}
10653   ins_encode %{
10654     __ xorq($dst$$Address, $src$$Register);
10655   %}
10656   ins_pipe(ialu_mem_reg);
10657 %}
10658 
10659 // Xor Memory with Immediate
10660 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10661 %{
10662   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10663   effect(KILL cr);
10664 
10665   ins_cost(125);
10666   format %{ "xorq    $dst, $src\t# long" %}
10667   ins_encode %{
10668     __ xorq($dst$$Address, $src$$constant);
10669   %}
10670   ins_pipe(ialu_mem_imm);
10671 %}
10672 
10673 // Convert Int to Boolean
10674 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10675 %{
10676   match(Set dst (Conv2B src));
10677   effect(KILL cr);
10678 
10679   format %{ "testl   $src, $src\t# ci2b\n\t"
10680             "setnz   $dst\n\t"
10681             "movzbl  $dst, $dst" %}
10682   ins_encode %{
10683     __ testl($src$$Register, $src$$Register);
10684     __ set_byte_if_not_zero($dst$$Register);
10685     __ movzbl($dst$$Register, $dst$$Register);
10686   %}
10687   ins_pipe(pipe_slow); // XXX
10688 %}
10689 
10690 // Convert Pointer to Boolean
10691 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10692 %{
10693   match(Set dst (Conv2B src));
10694   effect(KILL cr);
10695 
10696   format %{ "testq   $src, $src\t# cp2b\n\t"
10697             "setnz   $dst\n\t"
10698             "movzbl  $dst, $dst" %}
10699   ins_encode %{
10700     __ testq($src$$Register, $src$$Register);
10701     __ set_byte_if_not_zero($dst$$Register);
10702     __ movzbl($dst$$Register, $dst$$Register);
10703   %}
10704   ins_pipe(pipe_slow); // XXX
10705 %}
10706 
10707 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10708 %{
10709   match(Set dst (CmpLTMask p q));
10710   effect(KILL cr);
10711 
10712   ins_cost(400);
10713   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10714             "setlt   $dst\n\t"
10715             "movzbl  $dst, $dst\n\t"
10716             "negl    $dst" %}
10717   ins_encode %{
10718     __ cmpl($p$$Register, $q$$Register);
10719     __ setl($dst$$Register);
10720     __ movzbl($dst$$Register, $dst$$Register);
10721     __ negl($dst$$Register);
10722   %}
10723   ins_pipe(pipe_slow);
10724 %}
10725 
10726 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
10727 %{
10728   match(Set dst (CmpLTMask dst zero));
10729   effect(KILL cr);
10730 
10731   ins_cost(100);
10732   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10733   ins_encode %{
10734     __ sarl($dst$$Register, 31);
10735   %}
10736   ins_pipe(ialu_reg);
10737 %}
10738 
10739 /* Better to save a register than avoid a branch */
10740 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10741 %{
10742   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10743   effect(KILL cr);
10744   ins_cost(300);
10745   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10746             "jge     done\n\t"
10747             "addl    $p,$y\n"
10748             "done:   " %}
10749   ins_encode %{
10750     Register Rp = $p$$Register;
10751     Register Rq = $q$$Register;
10752     Register Ry = $y$$Register;
10753     Label done;
10754     __ subl(Rp, Rq);
10755     __ jccb(Assembler::greaterEqual, done);
10756     __ addl(Rp, Ry);
10757     __ bind(done);
10758   %}
10759   ins_pipe(pipe_cmplt);
10760 %}
10761 
10762 /* Better to save a register than avoid a branch */
10763 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10764 %{
10765   match(Set y (AndI (CmpLTMask p q) y));
10766   effect(KILL cr);
10767 
10768   ins_cost(300);
10769 
10770   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10771             "jlt     done\n\t"
10772             "xorl    $y, $y\n"
10773             "done:   " %}
10774   ins_encode %{
10775     Register Rp = $p$$Register;
10776     Register Rq = $q$$Register;
10777     Register Ry = $y$$Register;
10778     Label done;
10779     __ cmpl(Rp, Rq);
10780     __ jccb(Assembler::less, done);
10781     __ xorl(Ry, Ry);
10782     __ bind(done);
10783   %}
10784   ins_pipe(pipe_cmplt);
10785 %}
10786 
10787 
10788 //---------- FP Instructions------------------------------------------------
10789 
10790 // Really expensive, avoid
10791 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10792 %{
10793   match(Set cr (CmpF src1 src2));
10794 
10795   ins_cost(500);
10796   format %{ "ucomiss $src1, $src2\n\t"
10797             "jnp,s   exit\n\t"
10798             "pushfq\t# saw NaN, set CF\n\t"
10799             "andq    [rsp], #0xffffff2b\n\t"
10800             "popfq\n"
10801     "exit:" %}
10802   ins_encode %{
10803     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10804     emit_cmpfp_fixup(_masm);
10805   %}
10806   ins_pipe(pipe_slow);
10807 %}
10808 
10809 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10810   match(Set cr (CmpF src1 src2));
10811 
10812   ins_cost(100);
10813   format %{ "ucomiss $src1, $src2" %}
10814   ins_encode %{
10815     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10816   %}
10817   ins_pipe(pipe_slow);
10818 %}
10819 
10820 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10821   match(Set cr (CmpF src1 (LoadF src2)));
10822 
10823   ins_cost(100);
10824   format %{ "ucomiss $src1, $src2" %}
10825   ins_encode %{
10826     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10827   %}
10828   ins_pipe(pipe_slow);
10829 %}
10830 
10831 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10832   match(Set cr (CmpF src con));
10833   ins_cost(100);
10834   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10835   ins_encode %{
10836     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10837   %}
10838   ins_pipe(pipe_slow);
10839 %}
10840 
10841 // Really expensive, avoid
10842 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10843 %{
10844   match(Set cr (CmpD src1 src2));
10845 
10846   ins_cost(500);
10847   format %{ "ucomisd $src1, $src2\n\t"
10848             "jnp,s   exit\n\t"
10849             "pushfq\t# saw NaN, set CF\n\t"
10850             "andq    [rsp], #0xffffff2b\n\t"
10851             "popfq\n"
10852     "exit:" %}
10853   ins_encode %{
10854     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10855     emit_cmpfp_fixup(_masm);
10856   %}
10857   ins_pipe(pipe_slow);
10858 %}
10859 
10860 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10861   match(Set cr (CmpD src1 src2));
10862 
10863   ins_cost(100);
10864   format %{ "ucomisd $src1, $src2 test" %}
10865   ins_encode %{
10866     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10867   %}
10868   ins_pipe(pipe_slow);
10869 %}
10870 
10871 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10872   match(Set cr (CmpD src1 (LoadD src2)));
10873 
10874   ins_cost(100);
10875   format %{ "ucomisd $src1, $src2" %}
10876   ins_encode %{
10877     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10878   %}
10879   ins_pipe(pipe_slow);
10880 %}
10881 
10882 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10883   match(Set cr (CmpD src con));
10884   ins_cost(100);
10885   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10886   ins_encode %{
10887     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10888   %}
10889   ins_pipe(pipe_slow);
10890 %}
10891 
10892 // Compare into -1,0,1
10893 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10894 %{
10895   match(Set dst (CmpF3 src1 src2));
10896   effect(KILL cr);
10897 
10898   ins_cost(275);
10899   format %{ "ucomiss $src1, $src2\n\t"
10900             "movl    $dst, #-1\n\t"
10901             "jp,s    done\n\t"
10902             "jb,s    done\n\t"
10903             "setne   $dst\n\t"
10904             "movzbl  $dst, $dst\n"
10905     "done:" %}
10906   ins_encode %{
10907     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10908     emit_cmpfp3(_masm, $dst$$Register);
10909   %}
10910   ins_pipe(pipe_slow);
10911 %}
10912 
10913 // Compare into -1,0,1
10914 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10915 %{
10916   match(Set dst (CmpF3 src1 (LoadF src2)));
10917   effect(KILL cr);
10918 
10919   ins_cost(275);
10920   format %{ "ucomiss $src1, $src2\n\t"
10921             "movl    $dst, #-1\n\t"
10922             "jp,s    done\n\t"
10923             "jb,s    done\n\t"
10924             "setne   $dst\n\t"
10925             "movzbl  $dst, $dst\n"
10926     "done:" %}
10927   ins_encode %{
10928     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10929     emit_cmpfp3(_masm, $dst$$Register);
10930   %}
10931   ins_pipe(pipe_slow);
10932 %}
10933 
10934 // Compare into -1,0,1
10935 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10936   match(Set dst (CmpF3 src con));
10937   effect(KILL cr);
10938 
10939   ins_cost(275);
10940   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10941             "movl    $dst, #-1\n\t"
10942             "jp,s    done\n\t"
10943             "jb,s    done\n\t"
10944             "setne   $dst\n\t"
10945             "movzbl  $dst, $dst\n"
10946     "done:" %}
10947   ins_encode %{
10948     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10949     emit_cmpfp3(_masm, $dst$$Register);
10950   %}
10951   ins_pipe(pipe_slow);
10952 %}
10953 
10954 // Compare into -1,0,1
10955 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10956 %{
10957   match(Set dst (CmpD3 src1 src2));
10958   effect(KILL cr);
10959 
10960   ins_cost(275);
10961   format %{ "ucomisd $src1, $src2\n\t"
10962             "movl    $dst, #-1\n\t"
10963             "jp,s    done\n\t"
10964             "jb,s    done\n\t"
10965             "setne   $dst\n\t"
10966             "movzbl  $dst, $dst\n"
10967     "done:" %}
10968   ins_encode %{
10969     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10970     emit_cmpfp3(_masm, $dst$$Register);
10971   %}
10972   ins_pipe(pipe_slow);
10973 %}
10974 
10975 // Compare into -1,0,1
10976 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10977 %{
10978   match(Set dst (CmpD3 src1 (LoadD src2)));
10979   effect(KILL cr);
10980 
10981   ins_cost(275);
10982   format %{ "ucomisd $src1, $src2\n\t"
10983             "movl    $dst, #-1\n\t"
10984             "jp,s    done\n\t"
10985             "jb,s    done\n\t"
10986             "setne   $dst\n\t"
10987             "movzbl  $dst, $dst\n"
10988     "done:" %}
10989   ins_encode %{
10990     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10991     emit_cmpfp3(_masm, $dst$$Register);
10992   %}
10993   ins_pipe(pipe_slow);
10994 %}
10995 
10996 // Compare into -1,0,1
10997 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10998   match(Set dst (CmpD3 src con));
10999   effect(KILL cr);
11000 
11001   ins_cost(275);
11002   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
11003             "movl    $dst, #-1\n\t"
11004             "jp,s    done\n\t"
11005             "jb,s    done\n\t"
11006             "setne   $dst\n\t"
11007             "movzbl  $dst, $dst\n"
11008     "done:" %}
11009   ins_encode %{
11010     __ ucomisd($src$$XMMRegister, $constantaddress($con));
11011     emit_cmpfp3(_masm, $dst$$Register);
11012   %}
11013   ins_pipe(pipe_slow);
11014 %}
11015 
11016 //----------Arithmetic Conversion Instructions---------------------------------
11017 
11018 instruct convF2D_reg_reg(regD dst, regF src)
11019 %{
11020   match(Set dst (ConvF2D src));
11021 
11022   format %{ "cvtss2sd $dst, $src" %}
11023   ins_encode %{
11024     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11025   %}
11026   ins_pipe(pipe_slow); // XXX
11027 %}
11028 
11029 instruct convF2D_reg_mem(regD dst, memory src)
11030 %{
11031   match(Set dst (ConvF2D (LoadF src)));
11032 
11033   format %{ "cvtss2sd $dst, $src" %}
11034   ins_encode %{
11035     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
11036   %}
11037   ins_pipe(pipe_slow); // XXX
11038 %}
11039 
11040 instruct convD2F_reg_reg(regF dst, regD src)
11041 %{
11042   match(Set dst (ConvD2F src));
11043 
11044   format %{ "cvtsd2ss $dst, $src" %}
11045   ins_encode %{
11046     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11047   %}
11048   ins_pipe(pipe_slow); // XXX
11049 %}
11050 
11051 instruct convD2F_reg_mem(regF dst, memory src)
11052 %{
11053   match(Set dst (ConvD2F (LoadD src)));
11054 
11055   format %{ "cvtsd2ss $dst, $src" %}
11056   ins_encode %{
11057     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
11058   %}
11059   ins_pipe(pipe_slow); // XXX
11060 %}
11061 
11062 // XXX do mem variants
11063 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11064 %{
11065   match(Set dst (ConvF2I src));
11066   effect(KILL cr);
11067   format %{ "convert_f2i $dst,$src" %}
11068   ins_encode %{
11069     __ convert_f2i($dst$$Register, $src$$XMMRegister);
11070   %}
11071   ins_pipe(pipe_slow);
11072 %}
11073 
11074 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11075 %{
11076   match(Set dst (ConvF2L src));
11077   effect(KILL cr);
11078   format %{ "convert_f2l $dst,$src"%}
11079   ins_encode %{
11080     __ convert_f2l($dst$$Register, $src$$XMMRegister);
11081   %}
11082   ins_pipe(pipe_slow);
11083 %}
11084 
11085 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11086 %{
11087   match(Set dst (ConvD2I src));
11088   effect(KILL cr);
11089   format %{ "convert_d2i $dst,$src"%}
11090   ins_encode %{
11091     __ convert_d2i($dst$$Register, $src$$XMMRegister);
11092   %}
11093   ins_pipe(pipe_slow);
11094 %}
11095 
11096 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11097 %{
11098   match(Set dst (ConvD2L src));
11099   effect(KILL cr);
11100   format %{ "convert_d2l $dst,$src"%}
11101   ins_encode %{
11102     __ convert_d2l($dst$$Register, $src$$XMMRegister);
11103   %}
11104   ins_pipe(pipe_slow);
11105 %}
11106 
11107 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11108 %{
11109   match(Set dst (RoundD src));
11110   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11111   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
11112   ins_encode %{
11113     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11114   %}
11115   ins_pipe(pipe_slow);
11116 %}
11117 
11118 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11119 %{
11120   match(Set dst (RoundF src));
11121   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11122   format %{ "round_float $dst,$src" %}
11123   ins_encode %{
11124     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11125   %}
11126   ins_pipe(pipe_slow);
11127 %}
11128 
11129 instruct convI2F_reg_reg(regF dst, rRegI src)
11130 %{
11131   predicate(!UseXmmI2F);
11132   match(Set dst (ConvI2F src));
11133 
11134   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11135   ins_encode %{
11136     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11137   %}
11138   ins_pipe(pipe_slow); // XXX
11139 %}
11140 
11141 instruct convI2F_reg_mem(regF dst, memory src)
11142 %{
11143   match(Set dst (ConvI2F (LoadI src)));
11144 
11145   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11146   ins_encode %{
11147     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
11148   %}
11149   ins_pipe(pipe_slow); // XXX
11150 %}
11151 
11152 instruct convI2D_reg_reg(regD dst, rRegI src)
11153 %{
11154   predicate(!UseXmmI2D);
11155   match(Set dst (ConvI2D src));
11156 
11157   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11158   ins_encode %{
11159     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11160   %}
11161   ins_pipe(pipe_slow); // XXX
11162 %}
11163 
11164 instruct convI2D_reg_mem(regD dst, memory src)
11165 %{
11166   match(Set dst (ConvI2D (LoadI src)));
11167 
11168   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11169   ins_encode %{
11170     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
11171   %}
11172   ins_pipe(pipe_slow); // XXX
11173 %}
11174 
11175 instruct convXI2F_reg(regF dst, rRegI src)
11176 %{
11177   predicate(UseXmmI2F);
11178   match(Set dst (ConvI2F src));
11179 
11180   format %{ "movdl $dst, $src\n\t"
11181             "cvtdq2psl $dst, $dst\t# i2f" %}
11182   ins_encode %{
11183     __ movdl($dst$$XMMRegister, $src$$Register);
11184     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11185   %}
11186   ins_pipe(pipe_slow); // XXX
11187 %}
11188 
11189 instruct convXI2D_reg(regD dst, rRegI src)
11190 %{
11191   predicate(UseXmmI2D);
11192   match(Set dst (ConvI2D src));
11193 
11194   format %{ "movdl $dst, $src\n\t"
11195             "cvtdq2pdl $dst, $dst\t# i2d" %}
11196   ins_encode %{
11197     __ movdl($dst$$XMMRegister, $src$$Register);
11198     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11199   %}
11200   ins_pipe(pipe_slow); // XXX
11201 %}
11202 
11203 instruct convL2F_reg_reg(regF dst, rRegL src)
11204 %{
11205   match(Set dst (ConvL2F src));
11206 
11207   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11208   ins_encode %{
11209     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11210   %}
11211   ins_pipe(pipe_slow); // XXX
11212 %}
11213 
11214 instruct convL2F_reg_mem(regF dst, memory src)
11215 %{
11216   match(Set dst (ConvL2F (LoadL src)));
11217 
11218   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11219   ins_encode %{
11220     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11221   %}
11222   ins_pipe(pipe_slow); // XXX
11223 %}
11224 
11225 instruct convL2D_reg_reg(regD dst, rRegL src)
11226 %{
11227   match(Set dst (ConvL2D src));
11228 
11229   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11230   ins_encode %{
11231     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11232   %}
11233   ins_pipe(pipe_slow); // XXX
11234 %}
11235 
11236 instruct convL2D_reg_mem(regD dst, memory src)
11237 %{
11238   match(Set dst (ConvL2D (LoadL src)));
11239 
11240   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11241   ins_encode %{
11242     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11243   %}
11244   ins_pipe(pipe_slow); // XXX
11245 %}
11246 
11247 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11248 %{
11249   match(Set dst (ConvI2L src));
11250 
11251   ins_cost(125);
11252   format %{ "movslq  $dst, $src\t# i2l" %}
11253   ins_encode %{
11254     __ movslq($dst$$Register, $src$$Register);
11255   %}
11256   ins_pipe(ialu_reg_reg);
11257 %}
11258 
11259 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11260 // %{
11261 //   match(Set dst (ConvI2L src));
11262 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11263 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11264 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11265 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11266 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11267 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11268 
11269 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11270 //   ins_encode(enc_copy(dst, src));
11271 // //   opcode(0x63); // needs REX.W
11272 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11273 //   ins_pipe(ialu_reg_reg);
11274 // %}
11275 
11276 // Zero-extend convert int to long
11277 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11278 %{
11279   match(Set dst (AndL (ConvI2L src) mask));
11280 
11281   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11282   ins_encode %{
11283     if ($dst$$reg != $src$$reg) {
11284       __ movl($dst$$Register, $src$$Register);
11285     }
11286   %}
11287   ins_pipe(ialu_reg_reg);
11288 %}
11289 
11290 // Zero-extend convert int to long
11291 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11292 %{
11293   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11294 
11295   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11296   ins_encode %{
11297     __ movl($dst$$Register, $src$$Address);
11298   %}
11299   ins_pipe(ialu_reg_mem);
11300 %}
11301 
11302 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11303 %{
11304   match(Set dst (AndL src mask));
11305 
11306   format %{ "movl    $dst, $src\t# zero-extend long" %}
11307   ins_encode %{
11308     __ movl($dst$$Register, $src$$Register);
11309   %}
11310   ins_pipe(ialu_reg_reg);
11311 %}
11312 
11313 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11314 %{
11315   match(Set dst (ConvL2I src));
11316 
11317   format %{ "movl    $dst, $src\t# l2i" %}
11318   ins_encode %{
11319     __ movl($dst$$Register, $src$$Register);
11320   %}
11321   ins_pipe(ialu_reg_reg);
11322 %}
11323 
11324 
11325 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11326   match(Set dst (MoveF2I src));
11327   effect(DEF dst, USE src);
11328 
11329   ins_cost(125);
11330   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11331   ins_encode %{
11332     __ movl($dst$$Register, Address(rsp, $src$$disp));
11333   %}
11334   ins_pipe(ialu_reg_mem);
11335 %}
11336 
11337 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11338   match(Set dst (MoveI2F src));
11339   effect(DEF dst, USE src);
11340 
11341   ins_cost(125);
11342   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11343   ins_encode %{
11344     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11345   %}
11346   ins_pipe(pipe_slow);
11347 %}
11348 
11349 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11350   match(Set dst (MoveD2L src));
11351   effect(DEF dst, USE src);
11352 
11353   ins_cost(125);
11354   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11355   ins_encode %{
11356     __ movq($dst$$Register, Address(rsp, $src$$disp));
11357   %}
11358   ins_pipe(ialu_reg_mem);
11359 %}
11360 
11361 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11362   predicate(!UseXmmLoadAndClearUpper);
11363   match(Set dst (MoveL2D src));
11364   effect(DEF dst, USE src);
11365 
11366   ins_cost(125);
11367   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11368   ins_encode %{
11369     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11370   %}
11371   ins_pipe(pipe_slow);
11372 %}
11373 
11374 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11375   predicate(UseXmmLoadAndClearUpper);
11376   match(Set dst (MoveL2D src));
11377   effect(DEF dst, USE src);
11378 
11379   ins_cost(125);
11380   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11381   ins_encode %{
11382     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11383   %}
11384   ins_pipe(pipe_slow);
11385 %}
11386 
11387 
11388 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11389   match(Set dst (MoveF2I src));
11390   effect(DEF dst, USE src);
11391 
11392   ins_cost(95); // XXX
11393   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11394   ins_encode %{
11395     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11396   %}
11397   ins_pipe(pipe_slow);
11398 %}
11399 
11400 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11401   match(Set dst (MoveI2F src));
11402   effect(DEF dst, USE src);
11403 
11404   ins_cost(100);
11405   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11406   ins_encode %{
11407     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11408   %}
11409   ins_pipe( ialu_mem_reg );
11410 %}
11411 
11412 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11413   match(Set dst (MoveD2L src));
11414   effect(DEF dst, USE src);
11415 
11416   ins_cost(95); // XXX
11417   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11418   ins_encode %{
11419     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11420   %}
11421   ins_pipe(pipe_slow);
11422 %}
11423 
11424 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11425   match(Set dst (MoveL2D src));
11426   effect(DEF dst, USE src);
11427 
11428   ins_cost(100);
11429   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11430   ins_encode %{
11431     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11432   %}
11433   ins_pipe(ialu_mem_reg);
11434 %}
11435 
11436 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11437   match(Set dst (MoveF2I src));
11438   effect(DEF dst, USE src);
11439   ins_cost(85);
11440   format %{ "movd    $dst,$src\t# MoveF2I" %}
11441   ins_encode %{
11442     __ movdl($dst$$Register, $src$$XMMRegister);
11443   %}
11444   ins_pipe( pipe_slow );
11445 %}
11446 
11447 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11448   match(Set dst (MoveD2L src));
11449   effect(DEF dst, USE src);
11450   ins_cost(85);
11451   format %{ "movd    $dst,$src\t# MoveD2L" %}
11452   ins_encode %{
11453     __ movdq($dst$$Register, $src$$XMMRegister);
11454   %}
11455   ins_pipe( pipe_slow );
11456 %}
11457 
11458 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11459   match(Set dst (MoveI2F src));
11460   effect(DEF dst, USE src);
11461   ins_cost(100);
11462   format %{ "movd    $dst,$src\t# MoveI2F" %}
11463   ins_encode %{
11464     __ movdl($dst$$XMMRegister, $src$$Register);
11465   %}
11466   ins_pipe( pipe_slow );
11467 %}
11468 
11469 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11470   match(Set dst (MoveL2D src));
11471   effect(DEF dst, USE src);
11472   ins_cost(100);
11473   format %{ "movd    $dst,$src\t# MoveL2D" %}
11474   ins_encode %{
11475      __ movdq($dst$$XMMRegister, $src$$Register);
11476   %}
11477   ins_pipe( pipe_slow );
11478 %}
11479 
11480 
11481 // Fast clearing of an array
11482 // Small ClearArray non-AVX512.
11483 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11484                   Universe dummy, rFlagsReg cr)
11485 %{
11486   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11487   match(Set dummy (ClearArray (Binary cnt base) val));
11488   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11489 
11490   format %{ $$template
11491     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11492     $$emit$$"jg      LARGE\n\t"
11493     $$emit$$"dec     rcx\n\t"
11494     $$emit$$"js      DONE\t# Zero length\n\t"
11495     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11496     $$emit$$"dec     rcx\n\t"
11497     $$emit$$"jge     LOOP\n\t"
11498     $$emit$$"jmp     DONE\n\t"
11499     $$emit$$"# LARGE:\n\t"
11500     if (UseFastStosb) {
11501        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11502        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11503     } else if (UseXMMForObjInit) {
11504        $$emit$$"movdq   $tmp, $val\n\t"
11505        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11506        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11507        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11508        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11509        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11510        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11511        $$emit$$"add     0x40,rax\n\t"
11512        $$emit$$"# L_zero_64_bytes:\n\t"
11513        $$emit$$"sub     0x8,rcx\n\t"
11514        $$emit$$"jge     L_loop\n\t"
11515        $$emit$$"add     0x4,rcx\n\t"
11516        $$emit$$"jl      L_tail\n\t"
11517        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11518        $$emit$$"add     0x20,rax\n\t"
11519        $$emit$$"sub     0x4,rcx\n\t"
11520        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11521        $$emit$$"add     0x4,rcx\n\t"
11522        $$emit$$"jle     L_end\n\t"
11523        $$emit$$"dec     rcx\n\t"
11524        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11525        $$emit$$"vmovq   xmm0,(rax)\n\t"
11526        $$emit$$"add     0x8,rax\n\t"
11527        $$emit$$"dec     rcx\n\t"
11528        $$emit$$"jge     L_sloop\n\t"
11529        $$emit$$"# L_end:\n\t"
11530     } else {
11531        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11532     }
11533     $$emit$$"# DONE"
11534   %}
11535   ins_encode %{
11536     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11537                  $tmp$$XMMRegister, false, false);
11538   %}
11539   ins_pipe(pipe_slow);
11540 %}
11541 
11542 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11543                             Universe dummy, rFlagsReg cr)
11544 %{
11545   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11546   match(Set dummy (ClearArray (Binary cnt base) val));
11547   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11548 
11549   format %{ $$template
11550     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11551     $$emit$$"jg      LARGE\n\t"
11552     $$emit$$"dec     rcx\n\t"
11553     $$emit$$"js      DONE\t# Zero length\n\t"
11554     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11555     $$emit$$"dec     rcx\n\t"
11556     $$emit$$"jge     LOOP\n\t"
11557     $$emit$$"jmp     DONE\n\t"
11558     $$emit$$"# LARGE:\n\t"
11559     if (UseXMMForObjInit) {
11560        $$emit$$"movdq   $tmp, $val\n\t"
11561        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11562        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11563        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11564        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11565        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11566        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11567        $$emit$$"add     0x40,rax\n\t"
11568        $$emit$$"# L_zero_64_bytes:\n\t"
11569        $$emit$$"sub     0x8,rcx\n\t"
11570        $$emit$$"jge     L_loop\n\t"
11571        $$emit$$"add     0x4,rcx\n\t"
11572        $$emit$$"jl      L_tail\n\t"
11573        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11574        $$emit$$"add     0x20,rax\n\t"
11575        $$emit$$"sub     0x4,rcx\n\t"
11576        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11577        $$emit$$"add     0x4,rcx\n\t"
11578        $$emit$$"jle     L_end\n\t"
11579        $$emit$$"dec     rcx\n\t"
11580        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11581        $$emit$$"vmovq   xmm0,(rax)\n\t"
11582        $$emit$$"add     0x8,rax\n\t"
11583        $$emit$$"dec     rcx\n\t"
11584        $$emit$$"jge     L_sloop\n\t"
11585        $$emit$$"# L_end:\n\t"
11586     } else {
11587        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11588     }
11589     $$emit$$"# DONE"
11590   %}
11591   ins_encode %{
11592     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11593                  $tmp$$XMMRegister, false, true);
11594   %}
11595   ins_pipe(pipe_slow);
11596 %}
11597 
11598 // Small ClearArray AVX512 non-constant length.
11599 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11600                        Universe dummy, rFlagsReg cr)
11601 %{
11602   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11603   match(Set dummy (ClearArray (Binary cnt base) val));
11604   ins_cost(125);
11605   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11606 
11607   format %{ $$template
11608     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11609     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11610     $$emit$$"jg      LARGE\n\t"
11611     $$emit$$"dec     rcx\n\t"
11612     $$emit$$"js      DONE\t# Zero length\n\t"
11613     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11614     $$emit$$"dec     rcx\n\t"
11615     $$emit$$"jge     LOOP\n\t"
11616     $$emit$$"jmp     DONE\n\t"
11617     $$emit$$"# LARGE:\n\t"
11618     if (UseFastStosb) {
11619        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11620        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11621     } else if (UseXMMForObjInit) {
11622        $$emit$$"mov     rdi,rax\n\t"
11623        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11624        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11625        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11626        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11627        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11628        $$emit$$"add     0x40,rax\n\t"
11629        $$emit$$"# L_zero_64_bytes:\n\t"
11630        $$emit$$"sub     0x8,rcx\n\t"
11631        $$emit$$"jge     L_loop\n\t"
11632        $$emit$$"add     0x4,rcx\n\t"
11633        $$emit$$"jl      L_tail\n\t"
11634        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11635        $$emit$$"add     0x20,rax\n\t"
11636        $$emit$$"sub     0x4,rcx\n\t"
11637        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11638        $$emit$$"add     0x4,rcx\n\t"
11639        $$emit$$"jle     L_end\n\t"
11640        $$emit$$"dec     rcx\n\t"
11641        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11642        $$emit$$"vmovq   xmm0,(rax)\n\t"
11643        $$emit$$"add     0x8,rax\n\t"
11644        $$emit$$"dec     rcx\n\t"
11645        $$emit$$"jge     L_sloop\n\t"
11646        $$emit$$"# L_end:\n\t"
11647     } else {
11648        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11649     }
11650     $$emit$$"# DONE"
11651   %}
11652   ins_encode %{
11653     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11654                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11655   %}
11656   ins_pipe(pipe_slow);
11657 %}
11658 
11659 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11660                                  Universe dummy, rFlagsReg cr)
11661 %{
11662   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11663   match(Set dummy (ClearArray (Binary cnt base) val));
11664   ins_cost(125);
11665   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11666 
11667   format %{ $$template
11668     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11669     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11670     $$emit$$"jg      LARGE\n\t"
11671     $$emit$$"dec     rcx\n\t"
11672     $$emit$$"js      DONE\t# Zero length\n\t"
11673     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11674     $$emit$$"dec     rcx\n\t"
11675     $$emit$$"jge     LOOP\n\t"
11676     $$emit$$"jmp     DONE\n\t"
11677     $$emit$$"# LARGE:\n\t"
11678     if (UseFastStosb) {
11679        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11680        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11681     } else if (UseXMMForObjInit) {
11682        $$emit$$"mov     rdi,rax\n\t"
11683        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11684        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11685        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11686        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11687        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11688        $$emit$$"add     0x40,rax\n\t"
11689        $$emit$$"# L_zero_64_bytes:\n\t"
11690        $$emit$$"sub     0x8,rcx\n\t"
11691        $$emit$$"jge     L_loop\n\t"
11692        $$emit$$"add     0x4,rcx\n\t"
11693        $$emit$$"jl      L_tail\n\t"
11694        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11695        $$emit$$"add     0x20,rax\n\t"
11696        $$emit$$"sub     0x4,rcx\n\t"
11697        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11698        $$emit$$"add     0x4,rcx\n\t"
11699        $$emit$$"jle     L_end\n\t"
11700        $$emit$$"dec     rcx\n\t"
11701        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11702        $$emit$$"vmovq   xmm0,(rax)\n\t"
11703        $$emit$$"add     0x8,rax\n\t"
11704        $$emit$$"dec     rcx\n\t"
11705        $$emit$$"jge     L_sloop\n\t"
11706        $$emit$$"# L_end:\n\t"
11707     } else {
11708        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11709     }
11710     $$emit$$"# DONE"
11711   %}
11712   ins_encode %{
11713     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11714                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11715   %}
11716   ins_pipe(pipe_slow);
11717 %}
11718 
11719 // Large ClearArray non-AVX512.
11720 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11721                         Universe dummy, rFlagsReg cr)
11722 %{
11723   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11724   match(Set dummy (ClearArray (Binary cnt base) val));
11725   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11726 
11727   format %{ $$template
11728     if (UseFastStosb) {
11729        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11730        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11731     } else if (UseXMMForObjInit) {
11732        $$emit$$"movdq   $tmp, $val\n\t"
11733        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11734        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11735        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11736        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11737        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11738        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11739        $$emit$$"add     0x40,rax\n\t"
11740        $$emit$$"# L_zero_64_bytes:\n\t"
11741        $$emit$$"sub     0x8,rcx\n\t"
11742        $$emit$$"jge     L_loop\n\t"
11743        $$emit$$"add     0x4,rcx\n\t"
11744        $$emit$$"jl      L_tail\n\t"
11745        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11746        $$emit$$"add     0x20,rax\n\t"
11747        $$emit$$"sub     0x4,rcx\n\t"
11748        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11749        $$emit$$"add     0x4,rcx\n\t"
11750        $$emit$$"jle     L_end\n\t"
11751        $$emit$$"dec     rcx\n\t"
11752        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11753        $$emit$$"vmovq   xmm0,(rax)\n\t"
11754        $$emit$$"add     0x8,rax\n\t"
11755        $$emit$$"dec     rcx\n\t"
11756        $$emit$$"jge     L_sloop\n\t"
11757        $$emit$$"# L_end:\n\t"
11758     } else {
11759        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11760     }
11761   %}
11762   ins_encode %{
11763     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11764                  $tmp$$XMMRegister, true, false);
11765   %}
11766   ins_pipe(pipe_slow);
11767 %}
11768 
11769 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11770                                   Universe dummy, rFlagsReg cr)
11771 %{
11772   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11773   match(Set dummy (ClearArray (Binary cnt base) val));
11774   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11775 
11776   format %{ $$template
11777     if (UseXMMForObjInit) {
11778        $$emit$$"movdq   $tmp, $val\n\t"
11779        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11780        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11781        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11782        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11783        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11784        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11785        $$emit$$"add     0x40,rax\n\t"
11786        $$emit$$"# L_zero_64_bytes:\n\t"
11787        $$emit$$"sub     0x8,rcx\n\t"
11788        $$emit$$"jge     L_loop\n\t"
11789        $$emit$$"add     0x4,rcx\n\t"
11790        $$emit$$"jl      L_tail\n\t"
11791        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11792        $$emit$$"add     0x20,rax\n\t"
11793        $$emit$$"sub     0x4,rcx\n\t"
11794        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11795        $$emit$$"add     0x4,rcx\n\t"
11796        $$emit$$"jle     L_end\n\t"
11797        $$emit$$"dec     rcx\n\t"
11798        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11799        $$emit$$"vmovq   xmm0,(rax)\n\t"
11800        $$emit$$"add     0x8,rax\n\t"
11801        $$emit$$"dec     rcx\n\t"
11802        $$emit$$"jge     L_sloop\n\t"
11803        $$emit$$"# L_end:\n\t"
11804     } else {
11805        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11806     }
11807   %}
11808   ins_encode %{
11809     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11810                  $tmp$$XMMRegister, true, true);
11811   %}
11812   ins_pipe(pipe_slow);
11813 %}
11814 
11815 // Large ClearArray AVX512.
11816 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11817                              Universe dummy, rFlagsReg cr)
11818 %{
11819   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11820   match(Set dummy (ClearArray (Binary cnt base) val));
11821   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11822 
11823   format %{ $$template
11824     if (UseFastStosb) {
11825        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11826        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11827        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11828     } else if (UseXMMForObjInit) {
11829        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11830        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11831        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11832        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11833        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11834        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11835        $$emit$$"add     0x40,rax\n\t"
11836        $$emit$$"# L_zero_64_bytes:\n\t"
11837        $$emit$$"sub     0x8,rcx\n\t"
11838        $$emit$$"jge     L_loop\n\t"
11839        $$emit$$"add     0x4,rcx\n\t"
11840        $$emit$$"jl      L_tail\n\t"
11841        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11842        $$emit$$"add     0x20,rax\n\t"
11843        $$emit$$"sub     0x4,rcx\n\t"
11844        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11845        $$emit$$"add     0x4,rcx\n\t"
11846        $$emit$$"jle     L_end\n\t"
11847        $$emit$$"dec     rcx\n\t"
11848        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11849        $$emit$$"vmovq   xmm0,(rax)\n\t"
11850        $$emit$$"add     0x8,rax\n\t"
11851        $$emit$$"dec     rcx\n\t"
11852        $$emit$$"jge     L_sloop\n\t"
11853        $$emit$$"# L_end:\n\t"
11854     } else {
11855        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11856        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11857     }
11858   %}
11859   ins_encode %{
11860     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11861                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11862   %}
11863   ins_pipe(pipe_slow);
11864 %}
11865 
11866 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11867                                        Universe dummy, rFlagsReg cr)
11868 %{
11869   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11870   match(Set dummy (ClearArray (Binary cnt base) val));
11871   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11872 
11873   format %{ $$template
11874     if (UseFastStosb) {
11875        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11876        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11877        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11878     } else if (UseXMMForObjInit) {
11879        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11880        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11881        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11882        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11883        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11884        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11885        $$emit$$"add     0x40,rax\n\t"
11886        $$emit$$"# L_zero_64_bytes:\n\t"
11887        $$emit$$"sub     0x8,rcx\n\t"
11888        $$emit$$"jge     L_loop\n\t"
11889        $$emit$$"add     0x4,rcx\n\t"
11890        $$emit$$"jl      L_tail\n\t"
11891        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11892        $$emit$$"add     0x20,rax\n\t"
11893        $$emit$$"sub     0x4,rcx\n\t"
11894        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11895        $$emit$$"add     0x4,rcx\n\t"
11896        $$emit$$"jle     L_end\n\t"
11897        $$emit$$"dec     rcx\n\t"
11898        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11899        $$emit$$"vmovq   xmm0,(rax)\n\t"
11900        $$emit$$"add     0x8,rax\n\t"
11901        $$emit$$"dec     rcx\n\t"
11902        $$emit$$"jge     L_sloop\n\t"
11903        $$emit$$"# L_end:\n\t"
11904     } else {
11905        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11906        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11907     }
11908   %}
11909   ins_encode %{
11910     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11911                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11912   %}
11913   ins_pipe(pipe_slow);
11914 %}
11915 
11916 // Small ClearArray AVX512 constant length.
11917 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11918 %{
11919   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11920             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11921   match(Set dummy (ClearArray (Binary cnt base) val));
11922   ins_cost(100);
11923   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11924   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11925   ins_encode %{
11926     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11927   %}
11928   ins_pipe(pipe_slow);
11929 %}
11930 
11931 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11932                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11933 %{
11934   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11935   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11936   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11937 
11938   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11939   ins_encode %{
11940     __ string_compare($str1$$Register, $str2$$Register,
11941                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11942                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11943   %}
11944   ins_pipe( pipe_slow );
11945 %}
11946 
11947 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11948                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11949 %{
11950   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11951   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11952   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11953 
11954   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11955   ins_encode %{
11956     __ string_compare($str1$$Register, $str2$$Register,
11957                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11958                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11959   %}
11960   ins_pipe( pipe_slow );
11961 %}
11962 
11963 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11964                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11965 %{
11966   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11967   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11968   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11969 
11970   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11971   ins_encode %{
11972     __ string_compare($str1$$Register, $str2$$Register,
11973                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11974                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11975   %}
11976   ins_pipe( pipe_slow );
11977 %}
11978 
11979 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11980                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11981 %{
11982   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11983   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11984   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11985 
11986   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11987   ins_encode %{
11988     __ string_compare($str1$$Register, $str2$$Register,
11989                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11990                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11991   %}
11992   ins_pipe( pipe_slow );
11993 %}
11994 
11995 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11996                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11997 %{
11998   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11999   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12000   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12001 
12002   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12003   ins_encode %{
12004     __ string_compare($str1$$Register, $str2$$Register,
12005                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12006                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
12007   %}
12008   ins_pipe( pipe_slow );
12009 %}
12010 
12011 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
12012                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
12013 %{
12014   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12015   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12016   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12017 
12018   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12019   ins_encode %{
12020     __ string_compare($str1$$Register, $str2$$Register,
12021                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12022                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
12023   %}
12024   ins_pipe( pipe_slow );
12025 %}
12026 
12027 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
12028                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
12029 %{
12030   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12031   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12032   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12033 
12034   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12035   ins_encode %{
12036     __ string_compare($str2$$Register, $str1$$Register,
12037                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
12038                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
12039   %}
12040   ins_pipe( pipe_slow );
12041 %}
12042 
12043 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
12044                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
12045 %{
12046   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12047   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12048   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12049 
12050   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12051   ins_encode %{
12052     __ string_compare($str2$$Register, $str1$$Register,
12053                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
12054                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
12055   %}
12056   ins_pipe( pipe_slow );
12057 %}
12058 
12059 // fast search of substring with known size.
12060 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12061                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12062 %{
12063   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12064   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12065   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12066 
12067   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12068   ins_encode %{
12069     int icnt2 = (int)$int_cnt2$$constant;
12070     if (icnt2 >= 16) {
12071       // IndexOf for constant substrings with size >= 16 elements
12072       // which don't need to be loaded through stack.
12073       __ string_indexofC8($str1$$Register, $str2$$Register,
12074                           $cnt1$$Register, $cnt2$$Register,
12075                           icnt2, $result$$Register,
12076                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12077     } else {
12078       // Small strings are loaded through stack if they cross page boundary.
12079       __ string_indexof($str1$$Register, $str2$$Register,
12080                         $cnt1$$Register, $cnt2$$Register,
12081                         icnt2, $result$$Register,
12082                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12083     }
12084   %}
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 // fast search of substring with known size.
12089 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12090                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12091 %{
12092   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12093   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12094   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12095 
12096   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12097   ins_encode %{
12098     int icnt2 = (int)$int_cnt2$$constant;
12099     if (icnt2 >= 8) {
12100       // IndexOf for constant substrings with size >= 8 elements
12101       // which don't need to be loaded through stack.
12102       __ string_indexofC8($str1$$Register, $str2$$Register,
12103                           $cnt1$$Register, $cnt2$$Register,
12104                           icnt2, $result$$Register,
12105                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12106     } else {
12107       // Small strings are loaded through stack if they cross page boundary.
12108       __ string_indexof($str1$$Register, $str2$$Register,
12109                         $cnt1$$Register, $cnt2$$Register,
12110                         icnt2, $result$$Register,
12111                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12112     }
12113   %}
12114   ins_pipe( pipe_slow );
12115 %}
12116 
12117 // fast search of substring with known size.
12118 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12119                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12120 %{
12121   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12122   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12123   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12124 
12125   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12126   ins_encode %{
12127     int icnt2 = (int)$int_cnt2$$constant;
12128     if (icnt2 >= 8) {
12129       // IndexOf for constant substrings with size >= 8 elements
12130       // which don't need to be loaded through stack.
12131       __ string_indexofC8($str1$$Register, $str2$$Register,
12132                           $cnt1$$Register, $cnt2$$Register,
12133                           icnt2, $result$$Register,
12134                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12135     } else {
12136       // Small strings are loaded through stack if they cross page boundary.
12137       __ string_indexof($str1$$Register, $str2$$Register,
12138                         $cnt1$$Register, $cnt2$$Register,
12139                         icnt2, $result$$Register,
12140                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12141     }
12142   %}
12143   ins_pipe( pipe_slow );
12144 %}
12145 
12146 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12147                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12148 %{
12149   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12150   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12151   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12152 
12153   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12154   ins_encode %{
12155     __ string_indexof($str1$$Register, $str2$$Register,
12156                       $cnt1$$Register, $cnt2$$Register,
12157                       (-1), $result$$Register,
12158                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12159   %}
12160   ins_pipe( pipe_slow );
12161 %}
12162 
12163 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12164                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12165 %{
12166   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12167   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12168   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12169 
12170   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12171   ins_encode %{
12172     __ string_indexof($str1$$Register, $str2$$Register,
12173                       $cnt1$$Register, $cnt2$$Register,
12174                       (-1), $result$$Register,
12175                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12176   %}
12177   ins_pipe( pipe_slow );
12178 %}
12179 
12180 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12181                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12182 %{
12183   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12184   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12185   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12186 
12187   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12188   ins_encode %{
12189     __ string_indexof($str1$$Register, $str2$$Register,
12190                       $cnt1$$Register, $cnt2$$Register,
12191                       (-1), $result$$Register,
12192                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12193   %}
12194   ins_pipe( pipe_slow );
12195 %}
12196 
12197 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
12198                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
12199 %{
12200   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12201   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12202   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12203   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12204   ins_encode %{
12205     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12206                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12207   %}
12208   ins_pipe( pipe_slow );
12209 %}
12210 
12211 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
12212                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
12213 %{
12214   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12215   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12216   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12217   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12218   ins_encode %{
12219     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12220                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12221   %}
12222   ins_pipe( pipe_slow );
12223 %}
12224 
12225 // fast string equals
12226 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12227                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
12228 %{
12229   predicate(!VM_Version::supports_avx512vlbw());
12230   match(Set result (StrEquals (Binary str1 str2) cnt));
12231   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12232 
12233   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12234   ins_encode %{
12235     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12236                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12237                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12238   %}
12239   ins_pipe( pipe_slow );
12240 %}
12241 
12242 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12243                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
12244 %{
12245   predicate(VM_Version::supports_avx512vlbw());
12246   match(Set result (StrEquals (Binary str1 str2) cnt));
12247   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12248 
12249   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12250   ins_encode %{
12251     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12252                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12253                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12254   %}
12255   ins_pipe( pipe_slow );
12256 %}
12257 
12258 // fast array equals
12259 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12260                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12261 %{
12262   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12263   match(Set result (AryEq ary1 ary2));
12264   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12265 
12266   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12267   ins_encode %{
12268     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12269                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12270                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12271   %}
12272   ins_pipe( pipe_slow );
12273 %}
12274 
12275 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12276                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12277 %{
12278   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12279   match(Set result (AryEq ary1 ary2));
12280   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12281 
12282   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12283   ins_encode %{
12284     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12285                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12286                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12287   %}
12288   ins_pipe( pipe_slow );
12289 %}
12290 
12291 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12292                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12293 %{
12294   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12295   match(Set result (AryEq ary1 ary2));
12296   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12297 
12298   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12299   ins_encode %{
12300     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12301                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12302                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12303   %}
12304   ins_pipe( pipe_slow );
12305 %}
12306 
12307 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12308                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12309 %{
12310   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12311   match(Set result (AryEq ary1 ary2));
12312   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12313 
12314   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12315   ins_encode %{
12316     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12317                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12318                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12319   %}
12320   ins_pipe( pipe_slow );
12321 %}
12322 
12323 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12324                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
12325 %{
12326   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12327   match(Set result (CountPositives ary1 len));
12328   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12329 
12330   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12331   ins_encode %{
12332     __ count_positives($ary1$$Register, $len$$Register,
12333                        $result$$Register, $tmp3$$Register,
12334                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12335   %}
12336   ins_pipe( pipe_slow );
12337 %}
12338 
12339 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12340                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
12341 %{
12342   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12343   match(Set result (CountPositives ary1 len));
12344   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12345 
12346   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12347   ins_encode %{
12348     __ count_positives($ary1$$Register, $len$$Register,
12349                        $result$$Register, $tmp3$$Register,
12350                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12351   %}
12352   ins_pipe( pipe_slow );
12353 %}
12354 
12355 // fast char[] to byte[] compression
12356 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12357                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12358   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12359   match(Set result (StrCompressedCopy src (Binary dst len)));
12360   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
12361          USE_KILL len, KILL tmp5, KILL cr);
12362 
12363   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12364   ins_encode %{
12365     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12366                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12367                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12368                            knoreg, knoreg);
12369   %}
12370   ins_pipe( pipe_slow );
12371 %}
12372 
12373 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12374                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12375   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12376   match(Set result (StrCompressedCopy src (Binary dst len)));
12377   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
12378          USE_KILL len, KILL tmp5, KILL cr);
12379 
12380   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12381   ins_encode %{
12382     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12383                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12384                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12385                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12386   %}
12387   ins_pipe( pipe_slow );
12388 %}
12389 // fast byte[] to char[] inflation
12390 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12391                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
12392   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12393   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12394   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12395 
12396   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12397   ins_encode %{
12398     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12399                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12400   %}
12401   ins_pipe( pipe_slow );
12402 %}
12403 
12404 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12405                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
12406   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12407   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12408   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12409 
12410   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12411   ins_encode %{
12412     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12413                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12414   %}
12415   ins_pipe( pipe_slow );
12416 %}
12417 
12418 // encode char[] to byte[] in ISO_8859_1
12419 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12420                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12421                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12422   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12423   match(Set result (EncodeISOArray src (Binary dst len)));
12424   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12425 
12426   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12427   ins_encode %{
12428     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12429                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12430                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12431   %}
12432   ins_pipe( pipe_slow );
12433 %}
12434 
12435 // encode char[] to byte[] in ASCII
12436 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12437                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12438                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12439   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12440   match(Set result (EncodeISOArray src (Binary dst len)));
12441   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12442 
12443   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12444   ins_encode %{
12445     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12446                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12447                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12448   %}
12449   ins_pipe( pipe_slow );
12450 %}
12451 
12452 //----------Overflow Math Instructions-----------------------------------------
12453 
12454 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12455 %{
12456   match(Set cr (OverflowAddI op1 op2));
12457   effect(DEF cr, USE_KILL op1, USE op2);
12458 
12459   format %{ "addl    $op1, $op2\t# overflow check int" %}
12460 
12461   ins_encode %{
12462     __ addl($op1$$Register, $op2$$Register);
12463   %}
12464   ins_pipe(ialu_reg_reg);
12465 %}
12466 
12467 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
12468 %{
12469   match(Set cr (OverflowAddI op1 op2));
12470   effect(DEF cr, USE_KILL op1, USE op2);
12471 
12472   format %{ "addl    $op1, $op2\t# overflow check int" %}
12473 
12474   ins_encode %{
12475     __ addl($op1$$Register, $op2$$constant);
12476   %}
12477   ins_pipe(ialu_reg_reg);
12478 %}
12479 
12480 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12481 %{
12482   match(Set cr (OverflowAddL op1 op2));
12483   effect(DEF cr, USE_KILL op1, USE op2);
12484 
12485   format %{ "addq    $op1, $op2\t# overflow check long" %}
12486   ins_encode %{
12487     __ addq($op1$$Register, $op2$$Register);
12488   %}
12489   ins_pipe(ialu_reg_reg);
12490 %}
12491 
12492 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
12493 %{
12494   match(Set cr (OverflowAddL op1 op2));
12495   effect(DEF cr, USE_KILL op1, USE op2);
12496 
12497   format %{ "addq    $op1, $op2\t# overflow check long" %}
12498   ins_encode %{
12499     __ addq($op1$$Register, $op2$$constant);
12500   %}
12501   ins_pipe(ialu_reg_reg);
12502 %}
12503 
12504 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12505 %{
12506   match(Set cr (OverflowSubI op1 op2));
12507 
12508   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12509   ins_encode %{
12510     __ cmpl($op1$$Register, $op2$$Register);
12511   %}
12512   ins_pipe(ialu_reg_reg);
12513 %}
12514 
12515 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12516 %{
12517   match(Set cr (OverflowSubI op1 op2));
12518 
12519   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12520   ins_encode %{
12521     __ cmpl($op1$$Register, $op2$$constant);
12522   %}
12523   ins_pipe(ialu_reg_reg);
12524 %}
12525 
12526 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12527 %{
12528   match(Set cr (OverflowSubL op1 op2));
12529 
12530   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12531   ins_encode %{
12532     __ cmpq($op1$$Register, $op2$$Register);
12533   %}
12534   ins_pipe(ialu_reg_reg);
12535 %}
12536 
12537 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12538 %{
12539   match(Set cr (OverflowSubL op1 op2));
12540 
12541   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12542   ins_encode %{
12543     __ cmpq($op1$$Register, $op2$$constant);
12544   %}
12545   ins_pipe(ialu_reg_reg);
12546 %}
12547 
12548 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
12549 %{
12550   match(Set cr (OverflowSubI zero op2));
12551   effect(DEF cr, USE_KILL op2);
12552 
12553   format %{ "negl    $op2\t# overflow check int" %}
12554   ins_encode %{
12555     __ negl($op2$$Register);
12556   %}
12557   ins_pipe(ialu_reg_reg);
12558 %}
12559 
12560 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
12561 %{
12562   match(Set cr (OverflowSubL zero op2));
12563   effect(DEF cr, USE_KILL op2);
12564 
12565   format %{ "negq    $op2\t# overflow check long" %}
12566   ins_encode %{
12567     __ negq($op2$$Register);
12568   %}
12569   ins_pipe(ialu_reg_reg);
12570 %}
12571 
12572 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12573 %{
12574   match(Set cr (OverflowMulI op1 op2));
12575   effect(DEF cr, USE_KILL op1, USE op2);
12576 
12577   format %{ "imull    $op1, $op2\t# overflow check int" %}
12578   ins_encode %{
12579     __ imull($op1$$Register, $op2$$Register);
12580   %}
12581   ins_pipe(ialu_reg_reg_alu0);
12582 %}
12583 
12584 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
12585 %{
12586   match(Set cr (OverflowMulI op1 op2));
12587   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12588 
12589   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
12590   ins_encode %{
12591     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
12592   %}
12593   ins_pipe(ialu_reg_reg_alu0);
12594 %}
12595 
12596 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12597 %{
12598   match(Set cr (OverflowMulL op1 op2));
12599   effect(DEF cr, USE_KILL op1, USE op2);
12600 
12601   format %{ "imulq    $op1, $op2\t# overflow check long" %}
12602   ins_encode %{
12603     __ imulq($op1$$Register, $op2$$Register);
12604   %}
12605   ins_pipe(ialu_reg_reg_alu0);
12606 %}
12607 
12608 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
12609 %{
12610   match(Set cr (OverflowMulL op1 op2));
12611   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12612 
12613   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12614   ins_encode %{
12615     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12616   %}
12617   ins_pipe(ialu_reg_reg_alu0);
12618 %}
12619 
12620 
12621 //----------Control Flow Instructions------------------------------------------
12622 // Signed compare Instructions
12623 
12624 // XXX more variants!!
12625 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12626 %{
12627   match(Set cr (CmpI op1 op2));
12628   effect(DEF cr, USE op1, USE op2);
12629 
12630   format %{ "cmpl    $op1, $op2" %}
12631   ins_encode %{
12632     __ cmpl($op1$$Register, $op2$$Register);
12633   %}
12634   ins_pipe(ialu_cr_reg_reg);
12635 %}
12636 
12637 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12638 %{
12639   match(Set cr (CmpI op1 op2));
12640 
12641   format %{ "cmpl    $op1, $op2" %}
12642   ins_encode %{
12643     __ cmpl($op1$$Register, $op2$$constant);
12644   %}
12645   ins_pipe(ialu_cr_reg_imm);
12646 %}
12647 
12648 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12649 %{
12650   match(Set cr (CmpI op1 (LoadI op2)));
12651 
12652   ins_cost(500); // XXX
12653   format %{ "cmpl    $op1, $op2" %}
12654   ins_encode %{
12655     __ cmpl($op1$$Register, $op2$$Address);
12656   %}
12657   ins_pipe(ialu_cr_reg_mem);
12658 %}
12659 
12660 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
12661 %{
12662   match(Set cr (CmpI src zero));
12663 
12664   format %{ "testl   $src, $src" %}
12665   ins_encode %{
12666     __ testl($src$$Register, $src$$Register);
12667   %}
12668   ins_pipe(ialu_cr_reg_imm);
12669 %}
12670 
12671 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
12672 %{
12673   match(Set cr (CmpI (AndI src con) zero));
12674 
12675   format %{ "testl   $src, $con" %}
12676   ins_encode %{
12677     __ testl($src$$Register, $con$$constant);
12678   %}
12679   ins_pipe(ialu_cr_reg_imm);
12680 %}
12681 
12682 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
12683 %{
12684   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12685 
12686   format %{ "testl   $src, $mem" %}
12687   ins_encode %{
12688     __ testl($src$$Register, $mem$$Address);
12689   %}
12690   ins_pipe(ialu_cr_reg_mem);
12691 %}
12692 
12693 // Unsigned compare Instructions; really, same as signed except they
12694 // produce an rFlagsRegU instead of rFlagsReg.
12695 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12696 %{
12697   match(Set cr (CmpU op1 op2));
12698 
12699   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12700   ins_encode %{
12701     __ cmpl($op1$$Register, $op2$$Register);
12702   %}
12703   ins_pipe(ialu_cr_reg_reg);
12704 %}
12705 
12706 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12707 %{
12708   match(Set cr (CmpU op1 op2));
12709 
12710   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12711   ins_encode %{
12712     __ cmpl($op1$$Register, $op2$$constant);
12713   %}
12714   ins_pipe(ialu_cr_reg_imm);
12715 %}
12716 
12717 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12718 %{
12719   match(Set cr (CmpU op1 (LoadI op2)));
12720 
12721   ins_cost(500); // XXX
12722   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12723   ins_encode %{
12724     __ cmpl($op1$$Register, $op2$$Address);
12725   %}
12726   ins_pipe(ialu_cr_reg_mem);
12727 %}
12728 
12729 // // // Cisc-spilled version of cmpU_rReg
12730 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12731 // //%{
12732 // //  match(Set cr (CmpU (LoadI op1) op2));
12733 // //
12734 // //  format %{ "CMPu   $op1,$op2" %}
12735 // //  ins_cost(500);
12736 // //  opcode(0x39);  /* Opcode 39 /r */
12737 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12738 // //%}
12739 
12740 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
12741 %{
12742   match(Set cr (CmpU src zero));
12743 
12744   format %{ "testl   $src, $src\t# unsigned" %}
12745   ins_encode %{
12746     __ testl($src$$Register, $src$$Register);
12747   %}
12748   ins_pipe(ialu_cr_reg_imm);
12749 %}
12750 
12751 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12752 %{
12753   match(Set cr (CmpP op1 op2));
12754 
12755   format %{ "cmpq    $op1, $op2\t# ptr" %}
12756   ins_encode %{
12757     __ cmpq($op1$$Register, $op2$$Register);
12758   %}
12759   ins_pipe(ialu_cr_reg_reg);
12760 %}
12761 
12762 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12763 %{
12764   match(Set cr (CmpP op1 (LoadP op2)));
12765   predicate(n->in(2)->as_Load()->barrier_data() == 0);
12766 
12767   ins_cost(500); // XXX
12768   format %{ "cmpq    $op1, $op2\t# ptr" %}
12769   ins_encode %{
12770     __ cmpq($op1$$Register, $op2$$Address);
12771   %}
12772   ins_pipe(ialu_cr_reg_mem);
12773 %}
12774 
12775 // // // Cisc-spilled version of cmpP_rReg
12776 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12777 // //%{
12778 // //  match(Set cr (CmpP (LoadP op1) op2));
12779 // //
12780 // //  format %{ "CMPu   $op1,$op2" %}
12781 // //  ins_cost(500);
12782 // //  opcode(0x39);  /* Opcode 39 /r */
12783 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12784 // //%}
12785 
12786 // XXX this is generalized by compP_rReg_mem???
12787 // Compare raw pointer (used in out-of-heap check).
12788 // Only works because non-oop pointers must be raw pointers
12789 // and raw pointers have no anti-dependencies.
12790 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12791 %{
12792   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
12793             n->in(2)->as_Load()->barrier_data() == 0);
12794   match(Set cr (CmpP op1 (LoadP op2)));
12795 
12796   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12797   ins_encode %{
12798     __ cmpq($op1$$Register, $op2$$Address);
12799   %}
12800   ins_pipe(ialu_cr_reg_mem);
12801 %}
12802 
12803 // This will generate a signed flags result. This should be OK since
12804 // any compare to a zero should be eq/neq.
12805 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12806 %{
12807   match(Set cr (CmpP src zero));
12808 
12809   format %{ "testq   $src, $src\t# ptr" %}
12810   ins_encode %{
12811     __ testq($src$$Register, $src$$Register);
12812   %}
12813   ins_pipe(ialu_cr_reg_imm);
12814 %}
12815 
12816 // This will generate a signed flags result. This should be OK since
12817 // any compare to a zero should be eq/neq.
12818 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12819 %{
12820   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12821             n->in(1)->as_Load()->barrier_data() == 0);
12822   match(Set cr (CmpP (LoadP op) zero));
12823 
12824   ins_cost(500); // XXX
12825   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12826   ins_encode %{
12827     __ testq($op$$Address, 0xFFFFFFFF);
12828   %}
12829   ins_pipe(ialu_cr_reg_imm);
12830 %}
12831 
12832 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12833 %{
12834   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12835             n->in(1)->as_Load()->barrier_data() == 0);
12836   match(Set cr (CmpP (LoadP mem) zero));
12837 
12838   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12839   ins_encode %{
12840     __ cmpq(r12, $mem$$Address);
12841   %}
12842   ins_pipe(ialu_cr_reg_mem);
12843 %}
12844 
12845 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12846 %{
12847   match(Set cr (CmpN op1 op2));
12848 
12849   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12850   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12851   ins_pipe(ialu_cr_reg_reg);
12852 %}
12853 
12854 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12855 %{
12856   match(Set cr (CmpN src (LoadN mem)));
12857 
12858   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12859   ins_encode %{
12860     __ cmpl($src$$Register, $mem$$Address);
12861   %}
12862   ins_pipe(ialu_cr_reg_mem);
12863 %}
12864 
12865 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12866   match(Set cr (CmpN op1 op2));
12867 
12868   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12869   ins_encode %{
12870     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12871   %}
12872   ins_pipe(ialu_cr_reg_imm);
12873 %}
12874 
12875 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12876 %{
12877   match(Set cr (CmpN src (LoadN mem)));
12878 
12879   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12880   ins_encode %{
12881     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12882   %}
12883   ins_pipe(ialu_cr_reg_mem);
12884 %}
12885 
12886 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12887   match(Set cr (CmpN op1 op2));
12888 
12889   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12890   ins_encode %{
12891     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12892   %}
12893   ins_pipe(ialu_cr_reg_imm);
12894 %}
12895 
12896 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12897 %{
12898   match(Set cr (CmpN src (LoadNKlass mem)));
12899 
12900   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12901   ins_encode %{
12902     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12903   %}
12904   ins_pipe(ialu_cr_reg_mem);
12905 %}
12906 
12907 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12908   match(Set cr (CmpN src zero));
12909 
12910   format %{ "testl   $src, $src\t# compressed ptr" %}
12911   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12912   ins_pipe(ialu_cr_reg_imm);
12913 %}
12914 
12915 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12916 %{
12917   predicate(CompressedOops::base() != NULL);
12918   match(Set cr (CmpN (LoadN mem) zero));
12919 
12920   ins_cost(500); // XXX
12921   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12922   ins_encode %{
12923     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12924   %}
12925   ins_pipe(ialu_cr_reg_mem);
12926 %}
12927 
12928 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12929 %{
12930   predicate(CompressedOops::base() == NULL);
12931   match(Set cr (CmpN (LoadN mem) zero));
12932 
12933   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12934   ins_encode %{
12935     __ cmpl(r12, $mem$$Address);
12936   %}
12937   ins_pipe(ialu_cr_reg_mem);
12938 %}
12939 
12940 // Yanked all unsigned pointer compare operations.
12941 // Pointer compares are done with CmpP which is already unsigned.
12942 
12943 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12944 %{
12945   match(Set cr (CmpL op1 op2));
12946 
12947   format %{ "cmpq    $op1, $op2" %}
12948   ins_encode %{
12949     __ cmpq($op1$$Register, $op2$$Register);
12950   %}
12951   ins_pipe(ialu_cr_reg_reg);
12952 %}
12953 
12954 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12955 %{
12956   match(Set cr (CmpL op1 op2));
12957 
12958   format %{ "cmpq    $op1, $op2" %}
12959   ins_encode %{
12960     __ cmpq($op1$$Register, $op2$$constant);
12961   %}
12962   ins_pipe(ialu_cr_reg_imm);
12963 %}
12964 
12965 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12966 %{
12967   match(Set cr (CmpL op1 (LoadL op2)));
12968 
12969   format %{ "cmpq    $op1, $op2" %}
12970   ins_encode %{
12971     __ cmpq($op1$$Register, $op2$$Address);
12972   %}
12973   ins_pipe(ialu_cr_reg_mem);
12974 %}
12975 
12976 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12977 %{
12978   match(Set cr (CmpL src zero));
12979 
12980   format %{ "testq   $src, $src" %}
12981   ins_encode %{
12982     __ testq($src$$Register, $src$$Register);
12983   %}
12984   ins_pipe(ialu_cr_reg_imm);
12985 %}
12986 
12987 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12988 %{
12989   match(Set cr (CmpL (AndL src con) zero));
12990 
12991   format %{ "testq   $src, $con\t# long" %}
12992   ins_encode %{
12993     __ testq($src$$Register, $con$$constant);
12994   %}
12995   ins_pipe(ialu_cr_reg_imm);
12996 %}
12997 
12998 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12999 %{
13000   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
13001 
13002   format %{ "testq   $src, $mem" %}
13003   ins_encode %{
13004     __ testq($src$$Register, $mem$$Address);
13005   %}
13006   ins_pipe(ialu_cr_reg_mem);
13007 %}
13008 
13009 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
13010 %{
13011   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
13012 
13013   format %{ "testq   $src, $mem" %}
13014   ins_encode %{
13015     __ testq($src$$Register, $mem$$Address);
13016   %}
13017   ins_pipe(ialu_cr_reg_mem);
13018 %}
13019 
13020 // Manifest a CmpU result in an integer register.  Very painful.
13021 // This is the test to avoid.
13022 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
13023 %{
13024   match(Set dst (CmpU3 src1 src2));
13025   effect(KILL flags);
13026 
13027   ins_cost(275); // XXX
13028   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
13029             "movl    $dst, -1\n\t"
13030             "jb,u    done\n\t"
13031             "setne   $dst\n\t"
13032             "movzbl  $dst, $dst\n\t"
13033     "done:" %}
13034   ins_encode %{
13035     Label done;
13036     __ cmpl($src1$$Register, $src2$$Register);
13037     __ movl($dst$$Register, -1);
13038     __ jccb(Assembler::below, done);
13039     __ setne($dst$$Register);
13040     __ movzbl($dst$$Register, $dst$$Register);
13041     __ bind(done);
13042   %}
13043   ins_pipe(pipe_slow);
13044 %}
13045 
13046 // Manifest a CmpL result in an integer register.  Very painful.
13047 // This is the test to avoid.
13048 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
13049 %{
13050   match(Set dst (CmpL3 src1 src2));
13051   effect(KILL flags);
13052 
13053   ins_cost(275); // XXX
13054   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
13055             "movl    $dst, -1\n\t"
13056             "jl,s    done\n\t"
13057             "setne   $dst\n\t"
13058             "movzbl  $dst, $dst\n\t"
13059     "done:" %}
13060   ins_encode %{
13061     Label done;
13062     __ cmpq($src1$$Register, $src2$$Register);
13063     __ movl($dst$$Register, -1);
13064     __ jccb(Assembler::less, done);
13065     __ setne($dst$$Register);
13066     __ movzbl($dst$$Register, $dst$$Register);
13067     __ bind(done);
13068   %}
13069   ins_pipe(pipe_slow);
13070 %}
13071 
13072 // Manifest a CmpUL result in an integer register.  Very painful.
13073 // This is the test to avoid.
13074 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
13075 %{
13076   match(Set dst (CmpUL3 src1 src2));
13077   effect(KILL flags);
13078 
13079   ins_cost(275); // XXX
13080   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
13081             "movl    $dst, -1\n\t"
13082             "jb,u    done\n\t"
13083             "setne   $dst\n\t"
13084             "movzbl  $dst, $dst\n\t"
13085     "done:" %}
13086   ins_encode %{
13087     Label done;
13088     __ cmpq($src1$$Register, $src2$$Register);
13089     __ movl($dst$$Register, -1);
13090     __ jccb(Assembler::below, done);
13091     __ setne($dst$$Register);
13092     __ movzbl($dst$$Register, $dst$$Register);
13093     __ bind(done);
13094   %}
13095   ins_pipe(pipe_slow);
13096 %}
13097 
13098 // Unsigned long compare Instructions; really, same as signed long except they
13099 // produce an rFlagsRegU instead of rFlagsReg.
13100 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
13101 %{
13102   match(Set cr (CmpUL op1 op2));
13103 
13104   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13105   ins_encode %{
13106     __ cmpq($op1$$Register, $op2$$Register);
13107   %}
13108   ins_pipe(ialu_cr_reg_reg);
13109 %}
13110 
13111 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
13112 %{
13113   match(Set cr (CmpUL op1 op2));
13114 
13115   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13116   ins_encode %{
13117     __ cmpq($op1$$Register, $op2$$constant);
13118   %}
13119   ins_pipe(ialu_cr_reg_imm);
13120 %}
13121 
13122 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
13123 %{
13124   match(Set cr (CmpUL op1 (LoadL op2)));
13125 
13126   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13127   ins_encode %{
13128     __ cmpq($op1$$Register, $op2$$Address);
13129   %}
13130   ins_pipe(ialu_cr_reg_mem);
13131 %}
13132 
13133 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
13134 %{
13135   match(Set cr (CmpUL src zero));
13136 
13137   format %{ "testq   $src, $src\t# unsigned" %}
13138   ins_encode %{
13139     __ testq($src$$Register, $src$$Register);
13140   %}
13141   ins_pipe(ialu_cr_reg_imm);
13142 %}
13143 
13144 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
13145 %{
13146   match(Set cr (CmpI (LoadB mem) imm));
13147 
13148   ins_cost(125);
13149   format %{ "cmpb    $mem, $imm" %}
13150   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
13151   ins_pipe(ialu_cr_reg_mem);
13152 %}
13153 
13154 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
13155 %{
13156   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
13157 
13158   ins_cost(125);
13159   format %{ "testb   $mem, $imm\t# ubyte" %}
13160   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13161   ins_pipe(ialu_cr_reg_mem);
13162 %}
13163 
13164 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
13165 %{
13166   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
13167 
13168   ins_cost(125);
13169   format %{ "testb   $mem, $imm\t# byte" %}
13170   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13171   ins_pipe(ialu_cr_reg_mem);
13172 %}
13173 
13174 //----------Max and Min--------------------------------------------------------
13175 // Min Instructions
13176 
13177 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
13178 %{
13179   effect(USE_DEF dst, USE src, USE cr);
13180 
13181   format %{ "cmovlgt $dst, $src\t# min" %}
13182   ins_encode %{
13183     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
13184   %}
13185   ins_pipe(pipe_cmov_reg);
13186 %}
13187 
13188 
13189 instruct minI_rReg(rRegI dst, rRegI src)
13190 %{
13191   match(Set dst (MinI dst src));
13192 
13193   ins_cost(200);
13194   expand %{
13195     rFlagsReg cr;
13196     compI_rReg(cr, dst, src);
13197     cmovI_reg_g(dst, src, cr);
13198   %}
13199 %}
13200 
13201 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
13202 %{
13203   effect(USE_DEF dst, USE src, USE cr);
13204 
13205   format %{ "cmovllt $dst, $src\t# max" %}
13206   ins_encode %{
13207     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
13208   %}
13209   ins_pipe(pipe_cmov_reg);
13210 %}
13211 
13212 
13213 instruct maxI_rReg(rRegI dst, rRegI src)
13214 %{
13215   match(Set dst (MaxI dst src));
13216 
13217   ins_cost(200);
13218   expand %{
13219     rFlagsReg cr;
13220     compI_rReg(cr, dst, src);
13221     cmovI_reg_l(dst, src, cr);
13222   %}
13223 %}
13224 
13225 // ============================================================================
13226 // Branch Instructions
13227 
13228 // Jump Direct - Label defines a relative address from JMP+1
13229 instruct jmpDir(label labl)
13230 %{
13231   match(Goto);
13232   effect(USE labl);
13233 
13234   ins_cost(300);
13235   format %{ "jmp     $labl" %}
13236   size(5);
13237   ins_encode %{
13238     Label* L = $labl$$label;
13239     __ jmp(*L, false); // Always long jump
13240   %}
13241   ins_pipe(pipe_jmp);
13242 %}
13243 
13244 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13245 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
13246 %{
13247   match(If cop cr);
13248   effect(USE labl);
13249 
13250   ins_cost(300);
13251   format %{ "j$cop     $labl" %}
13252   size(6);
13253   ins_encode %{
13254     Label* L = $labl$$label;
13255     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13256   %}
13257   ins_pipe(pipe_jcc);
13258 %}
13259 
13260 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13261 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
13262 %{
13263   match(CountedLoopEnd cop cr);
13264   effect(USE labl);
13265 
13266   ins_cost(300);
13267   format %{ "j$cop     $labl\t# loop end" %}
13268   size(6);
13269   ins_encode %{
13270     Label* L = $labl$$label;
13271     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13272   %}
13273   ins_pipe(pipe_jcc);
13274 %}
13275 
13276 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13277 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13278   match(CountedLoopEnd cop cmp);
13279   effect(USE labl);
13280 
13281   ins_cost(300);
13282   format %{ "j$cop,u   $labl\t# loop end" %}
13283   size(6);
13284   ins_encode %{
13285     Label* L = $labl$$label;
13286     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13287   %}
13288   ins_pipe(pipe_jcc);
13289 %}
13290 
13291 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13292   match(CountedLoopEnd cop cmp);
13293   effect(USE labl);
13294 
13295   ins_cost(200);
13296   format %{ "j$cop,u   $labl\t# loop end" %}
13297   size(6);
13298   ins_encode %{
13299     Label* L = $labl$$label;
13300     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13301   %}
13302   ins_pipe(pipe_jcc);
13303 %}
13304 
13305 // Jump Direct Conditional - using unsigned comparison
13306 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13307   match(If cop cmp);
13308   effect(USE labl);
13309 
13310   ins_cost(300);
13311   format %{ "j$cop,u   $labl" %}
13312   size(6);
13313   ins_encode %{
13314     Label* L = $labl$$label;
13315     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13316   %}
13317   ins_pipe(pipe_jcc);
13318 %}
13319 
13320 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13321   match(If cop cmp);
13322   effect(USE labl);
13323 
13324   ins_cost(200);
13325   format %{ "j$cop,u   $labl" %}
13326   size(6);
13327   ins_encode %{
13328     Label* L = $labl$$label;
13329     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13330   %}
13331   ins_pipe(pipe_jcc);
13332 %}
13333 
13334 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13335   match(If cop cmp);
13336   effect(USE labl);
13337 
13338   ins_cost(200);
13339   format %{ $$template
13340     if ($cop$$cmpcode == Assembler::notEqual) {
13341       $$emit$$"jp,u    $labl\n\t"
13342       $$emit$$"j$cop,u   $labl"
13343     } else {
13344       $$emit$$"jp,u    done\n\t"
13345       $$emit$$"j$cop,u   $labl\n\t"
13346       $$emit$$"done:"
13347     }
13348   %}
13349   ins_encode %{
13350     Label* l = $labl$$label;
13351     if ($cop$$cmpcode == Assembler::notEqual) {
13352       __ jcc(Assembler::parity, *l, false);
13353       __ jcc(Assembler::notEqual, *l, false);
13354     } else if ($cop$$cmpcode == Assembler::equal) {
13355       Label done;
13356       __ jccb(Assembler::parity, done);
13357       __ jcc(Assembler::equal, *l, false);
13358       __ bind(done);
13359     } else {
13360        ShouldNotReachHere();
13361     }
13362   %}
13363   ins_pipe(pipe_jcc);
13364 %}
13365 
13366 // ============================================================================
13367 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
13368 // superklass array for an instance of the superklass.  Set a hidden
13369 // internal cache on a hit (cache is checked with exposed code in
13370 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13371 // encoding ALSO sets flags.
13372 
13373 instruct partialSubtypeCheck(rdi_RegP result,
13374                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13375                              rFlagsReg cr)
13376 %{
13377   match(Set result (PartialSubtypeCheck sub super));
13378   effect(KILL rcx, KILL cr);
13379 
13380   ins_cost(1100);  // slightly larger than the next version
13381   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13382             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13383             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13384             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
13385             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
13386             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13387             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
13388     "miss:\t" %}
13389 
13390   opcode(0x1); // Force a XOR of RDI
13391   ins_encode(enc_PartialSubtypeCheck());
13392   ins_pipe(pipe_slow);
13393 %}
13394 
13395 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
13396                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13397                                      immP0 zero,
13398                                      rdi_RegP result)
13399 %{
13400   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13401   effect(KILL rcx, KILL result);
13402 
13403   ins_cost(1000);
13404   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13405             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13406             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13407             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
13408             "jne,s   miss\t\t# Missed: flags nz\n\t"
13409             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13410     "miss:\t" %}
13411 
13412   opcode(0x0); // No need to XOR RDI
13413   ins_encode(enc_PartialSubtypeCheck());
13414   ins_pipe(pipe_slow);
13415 %}
13416 
13417 // ============================================================================
13418 // Branch Instructions -- short offset versions
13419 //
13420 // These instructions are used to replace jumps of a long offset (the default
13421 // match) with jumps of a shorter offset.  These instructions are all tagged
13422 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13423 // match rules in general matching.  Instead, the ADLC generates a conversion
13424 // method in the MachNode which can be used to do in-place replacement of the
13425 // long variant with the shorter variant.  The compiler will determine if a
13426 // branch can be taken by the is_short_branch_offset() predicate in the machine
13427 // specific code section of the file.
13428 
13429 // Jump Direct - Label defines a relative address from JMP+1
13430 instruct jmpDir_short(label labl) %{
13431   match(Goto);
13432   effect(USE labl);
13433 
13434   ins_cost(300);
13435   format %{ "jmp,s   $labl" %}
13436   size(2);
13437   ins_encode %{
13438     Label* L = $labl$$label;
13439     __ jmpb(*L);
13440   %}
13441   ins_pipe(pipe_jmp);
13442   ins_short_branch(1);
13443 %}
13444 
13445 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13446 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
13447   match(If cop cr);
13448   effect(USE labl);
13449 
13450   ins_cost(300);
13451   format %{ "j$cop,s   $labl" %}
13452   size(2);
13453   ins_encode %{
13454     Label* L = $labl$$label;
13455     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13456   %}
13457   ins_pipe(pipe_jcc);
13458   ins_short_branch(1);
13459 %}
13460 
13461 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13462 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
13463   match(CountedLoopEnd cop cr);
13464   effect(USE labl);
13465 
13466   ins_cost(300);
13467   format %{ "j$cop,s   $labl\t# loop end" %}
13468   size(2);
13469   ins_encode %{
13470     Label* L = $labl$$label;
13471     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13472   %}
13473   ins_pipe(pipe_jcc);
13474   ins_short_branch(1);
13475 %}
13476 
13477 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13478 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13479   match(CountedLoopEnd cop cmp);
13480   effect(USE labl);
13481 
13482   ins_cost(300);
13483   format %{ "j$cop,us  $labl\t# loop end" %}
13484   size(2);
13485   ins_encode %{
13486     Label* L = $labl$$label;
13487     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13488   %}
13489   ins_pipe(pipe_jcc);
13490   ins_short_branch(1);
13491 %}
13492 
13493 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13494   match(CountedLoopEnd cop cmp);
13495   effect(USE labl);
13496 
13497   ins_cost(300);
13498   format %{ "j$cop,us  $labl\t# loop end" %}
13499   size(2);
13500   ins_encode %{
13501     Label* L = $labl$$label;
13502     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13503   %}
13504   ins_pipe(pipe_jcc);
13505   ins_short_branch(1);
13506 %}
13507 
13508 // Jump Direct Conditional - using unsigned comparison
13509 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13510   match(If cop cmp);
13511   effect(USE labl);
13512 
13513   ins_cost(300);
13514   format %{ "j$cop,us  $labl" %}
13515   size(2);
13516   ins_encode %{
13517     Label* L = $labl$$label;
13518     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13519   %}
13520   ins_pipe(pipe_jcc);
13521   ins_short_branch(1);
13522 %}
13523 
13524 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13525   match(If cop cmp);
13526   effect(USE labl);
13527 
13528   ins_cost(300);
13529   format %{ "j$cop,us  $labl" %}
13530   size(2);
13531   ins_encode %{
13532     Label* L = $labl$$label;
13533     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13534   %}
13535   ins_pipe(pipe_jcc);
13536   ins_short_branch(1);
13537 %}
13538 
13539 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13540   match(If cop cmp);
13541   effect(USE labl);
13542 
13543   ins_cost(300);
13544   format %{ $$template
13545     if ($cop$$cmpcode == Assembler::notEqual) {
13546       $$emit$$"jp,u,s  $labl\n\t"
13547       $$emit$$"j$cop,u,s  $labl"
13548     } else {
13549       $$emit$$"jp,u,s  done\n\t"
13550       $$emit$$"j$cop,u,s  $labl\n\t"
13551       $$emit$$"done:"
13552     }
13553   %}
13554   size(4);
13555   ins_encode %{
13556     Label* l = $labl$$label;
13557     if ($cop$$cmpcode == Assembler::notEqual) {
13558       __ jccb(Assembler::parity, *l);
13559       __ jccb(Assembler::notEqual, *l);
13560     } else if ($cop$$cmpcode == Assembler::equal) {
13561       Label done;
13562       __ jccb(Assembler::parity, done);
13563       __ jccb(Assembler::equal, *l);
13564       __ bind(done);
13565     } else {
13566        ShouldNotReachHere();
13567     }
13568   %}
13569   ins_pipe(pipe_jcc);
13570   ins_short_branch(1);
13571 %}
13572 
13573 // ============================================================================
13574 // inlined locking and unlocking
13575 
13576 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
13577   predicate(Compile::current()->use_rtm());
13578   match(Set cr (FastLock object box));
13579   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13580   ins_cost(300);
13581   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13582   ins_encode %{
13583     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13584                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13585                  _rtm_counters, _stack_rtm_counters,
13586                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13587                  true, ra_->C->profile_rtm());
13588   %}
13589   ins_pipe(pipe_slow);
13590 %}
13591 
13592 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr, rRegP cx1) %{
13593   predicate(!Compile::current()->use_rtm());
13594   match(Set cr (FastLock object box));
13595   effect(TEMP tmp, TEMP scr, TEMP cx1, USE_KILL box);
13596   ins_cost(300);
13597   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
13598   ins_encode %{
13599     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13600                  $scr$$Register, $cx1$$Register, noreg, NULL, NULL, NULL, false, false);
13601   %}
13602   ins_pipe(pipe_slow);
13603 %}
13604 
13605 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13606   match(Set cr (FastUnlock object box));
13607   effect(TEMP tmp, USE_KILL box);
13608   ins_cost(300);
13609   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13610   ins_encode %{
13611     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13612   %}
13613   ins_pipe(pipe_slow);
13614 %}
13615 
13616 
13617 // ============================================================================
13618 // Safepoint Instructions
13619 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13620 %{
13621   match(SafePoint poll);
13622   effect(KILL cr, USE poll);
13623 
13624   format %{ "testl   rax, [$poll]\t"
13625             "# Safepoint: poll for GC" %}
13626   ins_cost(125);
13627   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13628   ins_encode %{
13629     __ relocate(relocInfo::poll_type);
13630     address pre_pc = __ pc();
13631     __ testl(rax, Address($poll$$Register, 0));
13632     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13633   %}
13634   ins_pipe(ialu_reg_mem);
13635 %}
13636 
13637 instruct mask_all_evexL(kReg dst, rRegL src) %{
13638   match(Set dst (MaskAll src));
13639   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
13640   ins_encode %{
13641     int mask_len = Matcher::vector_length(this);
13642     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13643   %}
13644   ins_pipe( pipe_slow );
13645 %}
13646 
13647 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
13648   predicate(Matcher::vector_length(n) > 32);
13649   match(Set dst (MaskAll src));
13650   effect(TEMP tmp);
13651   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
13652   ins_encode %{
13653     int mask_len = Matcher::vector_length(this);
13654     __ movslq($tmp$$Register, $src$$Register);
13655     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
13656   %}
13657   ins_pipe( pipe_slow );
13658 %}
13659 
13660 // ============================================================================
13661 // Procedure Call/Return Instructions
13662 // Call Java Static Instruction
13663 // Note: If this code changes, the corresponding ret_addr_offset() and
13664 //       compute_padding() functions will have to be adjusted.
13665 instruct CallStaticJavaDirect(method meth) %{
13666   match(CallStaticJava);
13667   effect(USE meth);
13668 
13669   ins_cost(300);
13670   format %{ "call,static " %}
13671   opcode(0xE8); /* E8 cd */
13672   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13673   ins_pipe(pipe_slow);
13674   ins_alignment(4);
13675 %}
13676 
13677 // Call Java Dynamic Instruction
13678 // Note: If this code changes, the corresponding ret_addr_offset() and
13679 //       compute_padding() functions will have to be adjusted.
13680 instruct CallDynamicJavaDirect(method meth)
13681 %{
13682   match(CallDynamicJava);
13683   effect(USE meth);
13684 
13685   ins_cost(300);
13686   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13687             "call,dynamic " %}
13688   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13689   ins_pipe(pipe_slow);
13690   ins_alignment(4);
13691 %}
13692 
13693 // Call Runtime Instruction
13694 instruct CallRuntimeDirect(method meth)
13695 %{
13696   match(CallRuntime);
13697   effect(USE meth);
13698 
13699   ins_cost(300);
13700   format %{ "call,runtime " %}
13701   ins_encode(clear_avx, Java_To_Runtime(meth));
13702   ins_pipe(pipe_slow);
13703 %}
13704 
13705 // Call runtime without safepoint
13706 instruct CallLeafDirect(method meth)
13707 %{
13708   match(CallLeaf);
13709   effect(USE meth);
13710 
13711   ins_cost(300);
13712   format %{ "call_leaf,runtime " %}
13713   ins_encode(clear_avx, Java_To_Runtime(meth));
13714   ins_pipe(pipe_slow);
13715 %}
13716 
13717 // Call runtime without safepoint and with vector arguments
13718 instruct CallLeafDirectVector(method meth)
13719 %{
13720   match(CallLeafVector);
13721   effect(USE meth);
13722 
13723   ins_cost(300);
13724   format %{ "call_leaf,vector " %}
13725   ins_encode(Java_To_Runtime(meth));
13726   ins_pipe(pipe_slow);
13727 %}
13728 
13729 // Call runtime without safepoint
13730 // entry point is null, target holds the address to call
13731 instruct CallLeafNoFPInDirect(rRegP target)
13732 %{
13733   predicate(n->as_Call()->entry_point() == NULL);
13734   match(CallLeafNoFP target);
13735 
13736   ins_cost(300);
13737   format %{ "call_leaf_nofp,runtime indirect " %}
13738   ins_encode %{
13739      __ call($target$$Register);
13740   %}
13741 
13742   ins_pipe(pipe_slow);
13743 %}
13744 
13745 instruct CallLeafNoFPDirect(method meth)
13746 %{
13747   predicate(n->as_Call()->entry_point() != NULL);
13748   match(CallLeafNoFP);
13749   effect(USE meth);
13750 
13751   ins_cost(300);
13752   format %{ "call_leaf_nofp,runtime " %}
13753   ins_encode(clear_avx, Java_To_Runtime(meth));
13754   ins_pipe(pipe_slow);
13755 %}
13756 
13757 // Return Instruction
13758 // Remove the return address & jump to it.
13759 // Notice: We always emit a nop after a ret to make sure there is room
13760 // for safepoint patching
13761 instruct Ret()
13762 %{
13763   match(Return);
13764 
13765   format %{ "ret" %}
13766   ins_encode %{
13767     __ ret(0);
13768   %}
13769   ins_pipe(pipe_jmp);
13770 %}
13771 
13772 // Tail Call; Jump from runtime stub to Java code.
13773 // Also known as an 'interprocedural jump'.
13774 // Target of jump will eventually return to caller.
13775 // TailJump below removes the return address.
13776 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
13777 %{
13778   match(TailCall jump_target method_ptr);
13779 
13780   ins_cost(300);
13781   format %{ "jmp     $jump_target\t# rbx holds method" %}
13782   ins_encode %{
13783     __ jmp($jump_target$$Register);
13784   %}
13785   ins_pipe(pipe_jmp);
13786 %}
13787 
13788 // Tail Jump; remove the return address; jump to target.
13789 // TailCall above leaves the return address around.
13790 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13791 %{
13792   match(TailJump jump_target ex_oop);
13793 
13794   ins_cost(300);
13795   format %{ "popq    rdx\t# pop return address\n\t"
13796             "jmp     $jump_target" %}
13797   ins_encode %{
13798     __ popq(as_Register(RDX_enc));
13799     __ jmp($jump_target$$Register);
13800   %}
13801   ins_pipe(pipe_jmp);
13802 %}
13803 
13804 // Create exception oop: created by stack-crawling runtime code.
13805 // Created exception is now available to this handler, and is setup
13806 // just prior to jumping to this handler.  No code emitted.
13807 instruct CreateException(rax_RegP ex_oop)
13808 %{
13809   match(Set ex_oop (CreateEx));
13810 
13811   size(0);
13812   // use the following format syntax
13813   format %{ "# exception oop is in rax; no code emitted" %}
13814   ins_encode();
13815   ins_pipe(empty);
13816 %}
13817 
13818 // Rethrow exception:
13819 // The exception oop will come in the first argument position.
13820 // Then JUMP (not call) to the rethrow stub code.
13821 instruct RethrowException()
13822 %{
13823   match(Rethrow);
13824 
13825   // use the following format syntax
13826   format %{ "jmp     rethrow_stub" %}
13827   ins_encode(enc_rethrow);
13828   ins_pipe(pipe_jmp);
13829 %}
13830 
13831 // ============================================================================
13832 // This name is KNOWN by the ADLC and cannot be changed.
13833 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13834 // for this guy.
13835 instruct tlsLoadP(r15_RegP dst) %{
13836   match(Set dst (ThreadLocal));
13837   effect(DEF dst);
13838 
13839   size(0);
13840   format %{ "# TLS is in R15" %}
13841   ins_encode( /*empty encoding*/ );
13842   ins_pipe(ialu_reg_reg);
13843 %}
13844 
13845 
13846 //----------PEEPHOLE RULES-----------------------------------------------------
13847 // These must follow all instruction definitions as they use the names
13848 // defined in the instructions definitions.
13849 //
13850 // peeppredicate ( rule_predicate );
13851 // // the predicate unless which the peephole rule will be ignored
13852 //
13853 // peepmatch ( root_instr_name [preceding_instruction]* );
13854 //
13855 // peepprocedure ( procedure_name );
13856 // // provide a procedure name to perform the optimization, the procedure should
13857 // // reside in the architecture dependent peephole file, the method has the
13858 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
13859 // // with the arguments being the basic block, the current node index inside the
13860 // // block, the register allocator, the functions upon invoked return a new node
13861 // // defined in peepreplace, and the rules of the nodes appearing in the
13862 // // corresponding peepmatch, the function return true if successful, else
13863 // // return false
13864 //
13865 // peepconstraint %{
13866 // (instruction_number.operand_name relational_op instruction_number.operand_name
13867 //  [, ...] );
13868 // // instruction numbers are zero-based using left to right order in peepmatch
13869 //
13870 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13871 // // provide an instruction_number.operand_name for each operand that appears
13872 // // in the replacement instruction's match rule
13873 //
13874 // ---------VM FLAGS---------------------------------------------------------
13875 //
13876 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13877 //
13878 // Each peephole rule is given an identifying number starting with zero and
13879 // increasing by one in the order seen by the parser.  An individual peephole
13880 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13881 // on the command-line.
13882 //
13883 // ---------CURRENT LIMITATIONS----------------------------------------------
13884 //
13885 // Only transformations inside a basic block (do we need more for peephole)
13886 //
13887 // ---------EXAMPLE----------------------------------------------------------
13888 //
13889 // // pertinent parts of existing instructions in architecture description
13890 // instruct movI(rRegI dst, rRegI src)
13891 // %{
13892 //   match(Set dst (CopyI src));
13893 // %}
13894 //
13895 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13896 // %{
13897 //   match(Set dst (AddI dst src));
13898 //   effect(KILL cr);
13899 // %}
13900 //
13901 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
13902 // %{
13903 //   match(Set dst (AddI dst src));
13904 // %}
13905 //
13906 // 1. Simple replacement
13907 // - Only match adjacent instructions in same basic block
13908 // - Only equality constraints
13909 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
13910 // - Only one replacement instruction
13911 //
13912 // // Change (inc mov) to lea
13913 // peephole %{
13914 //   // lea should only be emitted when beneficial
13915 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13916 //   // increment preceded by register-register move
13917 //   peepmatch ( incI_rReg movI );
13918 //   // require that the destination register of the increment
13919 //   // match the destination register of the move
13920 //   peepconstraint ( 0.dst == 1.dst );
13921 //   // construct a replacement instruction that sets
13922 //   // the destination to ( move's source register + one )
13923 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13924 // %}
13925 //
13926 // 2. Procedural replacement
13927 // - More flexible finding relevent nodes
13928 // - More flexible constraints
13929 // - More flexible transformations
13930 // - May utilise architecture-dependent API more effectively
13931 // - Currently only one replacement instruction due to adlc parsing capabilities
13932 //
13933 // // Change (inc mov) to lea
13934 // peephole %{
13935 //   // lea should only be emitted when beneficial
13936 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13937 //   // the rule numbers of these nodes inside are passed into the function below
13938 //   peepmatch ( incI_rReg movI );
13939 //   // the method that takes the responsibility of transformation
13940 //   peepprocedure ( inc_mov_to_lea );
13941 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
13942 //   // node is passed into the function above
13943 //   peepreplace ( leaI_rReg_immI() );
13944 // %}
13945 
13946 // These instructions is not matched by the matcher but used by the peephole
13947 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
13948 %{
13949   predicate(false);
13950   match(Set dst (AddI src1 src2));
13951   format %{ "leal    $dst, [$src1 + $src2]" %}
13952   ins_encode %{
13953     Register dst = $dst$$Register;
13954     Register src1 = $src1$$Register;
13955     Register src2 = $src2$$Register;
13956     if (src1 != rbp && src1 != r13) {
13957       __ leal(dst, Address(src1, src2, Address::times_1));
13958     } else {
13959       assert(src2 != rbp && src2 != r13, "");
13960       __ leal(dst, Address(src2, src1, Address::times_1));
13961     }
13962   %}
13963   ins_pipe(ialu_reg_reg);
13964 %}
13965 
13966 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
13967 %{
13968   predicate(false);
13969   match(Set dst (AddI src1 src2));
13970   format %{ "leal    $dst, [$src1 + $src2]" %}
13971   ins_encode %{
13972     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
13973   %}
13974   ins_pipe(ialu_reg_reg);
13975 %}
13976 
13977 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
13978 %{
13979   predicate(false);
13980   match(Set dst (LShiftI src shift));
13981   format %{ "leal    $dst, [$src << $shift]" %}
13982   ins_encode %{
13983     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13984     Register src = $src$$Register;
13985     if (scale == Address::times_2 && src != rbp && src != r13) {
13986       __ leal($dst$$Register, Address(src, src, Address::times_1));
13987     } else {
13988       __ leal($dst$$Register, Address(noreg, src, scale));
13989     }
13990   %}
13991   ins_pipe(ialu_reg_reg);
13992 %}
13993 
13994 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
13995 %{
13996   predicate(false);
13997   match(Set dst (AddL src1 src2));
13998   format %{ "leaq    $dst, [$src1 + $src2]" %}
13999   ins_encode %{
14000     Register dst = $dst$$Register;
14001     Register src1 = $src1$$Register;
14002     Register src2 = $src2$$Register;
14003     if (src1 != rbp && src1 != r13) {
14004       __ leaq(dst, Address(src1, src2, Address::times_1));
14005     } else {
14006       assert(src2 != rbp && src2 != r13, "");
14007       __ leaq(dst, Address(src2, src1, Address::times_1));
14008     }
14009   %}
14010   ins_pipe(ialu_reg_reg);
14011 %}
14012 
14013 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
14014 %{
14015   predicate(false);
14016   match(Set dst (AddL src1 src2));
14017   format %{ "leaq    $dst, [$src1 + $src2]" %}
14018   ins_encode %{
14019     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
14020   %}
14021   ins_pipe(ialu_reg_reg);
14022 %}
14023 
14024 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
14025 %{
14026   predicate(false);
14027   match(Set dst (LShiftL src shift));
14028   format %{ "leaq    $dst, [$src << $shift]" %}
14029   ins_encode %{
14030     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
14031     Register src = $src$$Register;
14032     if (scale == Address::times_2 && src != rbp && src != r13) {
14033       __ leaq($dst$$Register, Address(src, src, Address::times_1));
14034     } else {
14035       __ leaq($dst$$Register, Address(noreg, src, scale));
14036     }
14037   %}
14038   ins_pipe(ialu_reg_reg);
14039 %}
14040 
14041 peephole
14042 %{
14043   peeppredicate(VM_Version::supports_fast_2op_lea());
14044   peepmatch (addI_rReg);
14045   peepprocedure (lea_coalesce_reg);
14046   peepreplace (leaI_rReg_rReg_peep());
14047 %}
14048 
14049 peephole
14050 %{
14051   peeppredicate(VM_Version::supports_fast_2op_lea());
14052   peepmatch (addI_rReg_imm);
14053   peepprocedure (lea_coalesce_imm);
14054   peepreplace (leaI_rReg_immI_peep());
14055 %}
14056 
14057 peephole
14058 %{
14059   peeppredicate(VM_Version::supports_fast_2op_lea());
14060   peepmatch (incI_rReg);
14061   peepprocedure (lea_coalesce_imm);
14062   peepreplace (leaI_rReg_immI_peep());
14063 %}
14064 
14065 peephole
14066 %{
14067   peeppredicate(VM_Version::supports_fast_2op_lea());
14068   peepmatch (decI_rReg);
14069   peepprocedure (lea_coalesce_imm);
14070   peepreplace (leaI_rReg_immI_peep());
14071 %}
14072 
14073 peephole
14074 %{
14075   peeppredicate(VM_Version::supports_fast_2op_lea());
14076   peepmatch (salI_rReg_immI2);
14077   peepprocedure (lea_coalesce_imm);
14078   peepreplace (leaI_rReg_immI2_peep());
14079 %}
14080 
14081 peephole
14082 %{
14083   peeppredicate(VM_Version::supports_fast_2op_lea());
14084   peepmatch (addL_rReg);
14085   peepprocedure (lea_coalesce_reg);
14086   peepreplace (leaL_rReg_rReg_peep());
14087 %}
14088 
14089 peephole
14090 %{
14091   peeppredicate(VM_Version::supports_fast_2op_lea());
14092   peepmatch (addL_rReg_imm);
14093   peepprocedure (lea_coalesce_imm);
14094   peepreplace (leaL_rReg_immL32_peep());
14095 %}
14096 
14097 peephole
14098 %{
14099   peeppredicate(VM_Version::supports_fast_2op_lea());
14100   peepmatch (incL_rReg);
14101   peepprocedure (lea_coalesce_imm);
14102   peepreplace (leaL_rReg_immL32_peep());
14103 %}
14104 
14105 peephole
14106 %{
14107   peeppredicate(VM_Version::supports_fast_2op_lea());
14108   peepmatch (decL_rReg);
14109   peepprocedure (lea_coalesce_imm);
14110   peepreplace (leaL_rReg_immL32_peep());
14111 %}
14112 
14113 peephole
14114 %{
14115   peeppredicate(VM_Version::supports_fast_2op_lea());
14116   peepmatch (salL_rReg_immI2);
14117   peepprocedure (lea_coalesce_imm);
14118   peepreplace (leaL_rReg_immI2_peep());
14119 %}
14120 
14121 //----------SMARTSPILL RULES---------------------------------------------------
14122 // These must follow all instruction definitions as they use the names
14123 // defined in the instructions definitions.