1 //
    2 // Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RDX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RDI int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 
  323 source_hpp %{
  324 
  325 #include "peephole_x86_64.hpp"
  326 
  327 %}
  328 
  329 // Register masks
  330 source_hpp %{
  331 
  332 extern RegMask _ANY_REG_mask;
  333 extern RegMask _PTR_REG_mask;
  334 extern RegMask _PTR_REG_NO_RBP_mask;
  335 extern RegMask _PTR_NO_RAX_REG_mask;
  336 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  337 extern RegMask _LONG_REG_mask;
  338 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  339 extern RegMask _LONG_NO_RCX_REG_mask;
  340 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  341 extern RegMask _INT_REG_mask;
  342 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  343 extern RegMask _INT_NO_RCX_REG_mask;
  344 extern RegMask _INT_NO_RBP_R13_REG_mask;
  345 extern RegMask _FLOAT_REG_mask;
  346 
  347 extern RegMask _STACK_OR_PTR_REG_mask;
  348 extern RegMask _STACK_OR_LONG_REG_mask;
  349 extern RegMask _STACK_OR_INT_REG_mask;
  350 
  351 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  352 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  353 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  354 
  355 %}
  356 
  357 source %{
  358 #define   RELOC_IMM64    Assembler::imm_operand
  359 #define   RELOC_DISP32   Assembler::disp32_operand
  360 
  361 #define __ _masm.
  362 
  363 RegMask _ANY_REG_mask;
  364 RegMask _PTR_REG_mask;
  365 RegMask _PTR_REG_NO_RBP_mask;
  366 RegMask _PTR_NO_RAX_REG_mask;
  367 RegMask _PTR_NO_RAX_RBX_REG_mask;
  368 RegMask _LONG_REG_mask;
  369 RegMask _LONG_NO_RAX_RDX_REG_mask;
  370 RegMask _LONG_NO_RCX_REG_mask;
  371 RegMask _LONG_NO_RBP_R13_REG_mask;
  372 RegMask _INT_REG_mask;
  373 RegMask _INT_NO_RAX_RDX_REG_mask;
  374 RegMask _INT_NO_RCX_REG_mask;
  375 RegMask _INT_NO_RBP_R13_REG_mask;
  376 RegMask _FLOAT_REG_mask;
  377 RegMask _STACK_OR_PTR_REG_mask;
  378 RegMask _STACK_OR_LONG_REG_mask;
  379 RegMask _STACK_OR_INT_REG_mask;
  380 
  381 static bool need_r12_heapbase() {
  382   return UseCompressedOops;
  383 }
  384 
  385 void reg_mask_init() {
  386   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  387   // We derive a number of subsets from it.
  388   _ANY_REG_mask = _ALL_REG_mask;
  389 
  390   if (PreserveFramePointer) {
  391     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  392     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  393   }
  394   if (need_r12_heapbase()) {
  395     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  396     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  397   }
  398 
  399   _PTR_REG_mask = _ANY_REG_mask;
  400   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  401   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  402   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  403   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  404 
  405   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  406   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  407 
  408   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  409   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  410   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  411 
  412   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  413   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  414   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  415 
  416   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  417   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  418   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  419 
  420   _LONG_REG_mask = _PTR_REG_mask;
  421   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  422   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  423 
  424   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  425   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  426   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  427   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  428   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  429 
  430   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  431   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  432   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  433 
  434   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  435   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  436   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  437   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  438   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  439 
  440   _INT_REG_mask = _ALL_INT_REG_mask;
  441   if (PreserveFramePointer) {
  442     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  443   }
  444   if (need_r12_heapbase()) {
  445     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  446   }
  447 
  448   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  449   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  450 
  451   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  452   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  453   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  454 
  455   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  456   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  457 
  458   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  459   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  460   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  461 
  462   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  463   // from the float_reg_legacy/float_reg_evex register class.
  464   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  465 }
  466 
  467 static bool generate_vzeroupper(Compile* C) {
  468   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  469 }
  470 
  471 static int clear_avx_size() {
  472   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  473 }
  474 
  475 // !!!!! Special hack to get all types of calls to specify the byte offset
  476 //       from the start of the call to the point where the return address
  477 //       will point.
  478 int MachCallStaticJavaNode::ret_addr_offset()
  479 {
  480   int offset = 5; // 5 bytes from start of call to where return address points
  481   offset += clear_avx_size();
  482   return offset;
  483 }
  484 
  485 int MachCallDynamicJavaNode::ret_addr_offset()
  486 {
  487   int offset = 15; // 15 bytes from start of call to where return address points
  488   offset += clear_avx_size();
  489   return offset;
  490 }
  491 
  492 int MachCallRuntimeNode::ret_addr_offset() {
  493   if (_entry_point == nullptr) {
  494     // CallLeafNoFPInDirect
  495     return 3; // callq (register)
  496   }
  497   int offset = 13; // movq r10,#addr; callq (r10)
  498   if (this->ideal_Opcode() != Op_CallLeafVector) {
  499     offset += clear_avx_size();
  500   }
  501   return offset;
  502 }
  503 
  504 //
  505 // Compute padding required for nodes which need alignment
  506 //
  507 
  508 // The address of the call instruction needs to be 4-byte aligned to
  509 // ensure that it does not span a cache line so that it can be patched.
  510 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  511 {
  512   current_offset += clear_avx_size(); // skip vzeroupper
  513   current_offset += 1; // skip call opcode byte
  514   return align_up(current_offset, alignment_required()) - current_offset;
  515 }
  516 
  517 // The address of the call instruction needs to be 4-byte aligned to
  518 // ensure that it does not span a cache line so that it can be patched.
  519 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  520 {
  521   current_offset += clear_avx_size(); // skip vzeroupper
  522   current_offset += 11; // skip movq instruction + call opcode byte
  523   return align_up(current_offset, alignment_required()) - current_offset;
  524 }
  525 
  526 // This could be in MacroAssembler but it's fairly C2 specific
  527 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  528   Label exit;
  529   __ jccb(Assembler::noParity, exit);
  530   __ pushf();
  531   //
  532   // comiss/ucomiss instructions set ZF,PF,CF flags and
  533   // zero OF,AF,SF for NaN values.
  534   // Fixup flags by zeroing ZF,PF so that compare of NaN
  535   // values returns 'less than' result (CF is set).
  536   // Leave the rest of flags unchanged.
  537   //
  538   //    7 6 5 4 3 2 1 0
  539   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  540   //    0 0 1 0 1 0 1 1   (0x2B)
  541   //
  542   __ andq(Address(rsp, 0), 0xffffff2b);
  543   __ popf();
  544   __ bind(exit);
  545 }
  546 
  547 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  548   Label done;
  549   __ movl(dst, -1);
  550   __ jcc(Assembler::parity, done);
  551   __ jcc(Assembler::below, done);
  552   __ setb(Assembler::notEqual, dst);
  553   __ movzbl(dst, dst);
  554   __ bind(done);
  555 }
  556 
  557 // Math.min()    # Math.max()
  558 // --------------------------
  559 // ucomis[s/d]   #
  560 // ja   -> b     # a
  561 // jp   -> NaN   # NaN
  562 // jb   -> a     # b
  563 // je            #
  564 // |-jz -> a | b # a & b
  565 // |    -> a     #
  566 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  567                      XMMRegister a, XMMRegister b,
  568                      XMMRegister xmmt, Register rt,
  569                      bool min, bool single) {
  570 
  571   Label nan, zero, below, above, done;
  572 
  573   if (single)
  574     __ ucomiss(a, b);
  575   else
  576     __ ucomisd(a, b);
  577 
  578   if (dst->encoding() != (min ? b : a)->encoding())
  579     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  580   else
  581     __ jccb(Assembler::above, done);
  582 
  583   __ jccb(Assembler::parity, nan);  // PF=1
  584   __ jccb(Assembler::below, below); // CF=1
  585 
  586   // equal
  587   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  588   if (single) {
  589     __ ucomiss(a, xmmt);
  590     __ jccb(Assembler::equal, zero);
  591 
  592     __ movflt(dst, a);
  593     __ jmp(done);
  594   }
  595   else {
  596     __ ucomisd(a, xmmt);
  597     __ jccb(Assembler::equal, zero);
  598 
  599     __ movdbl(dst, a);
  600     __ jmp(done);
  601   }
  602 
  603   __ bind(zero);
  604   if (min)
  605     __ vpor(dst, a, b, Assembler::AVX_128bit);
  606   else
  607     __ vpand(dst, a, b, Assembler::AVX_128bit);
  608 
  609   __ jmp(done);
  610 
  611   __ bind(above);
  612   if (single)
  613     __ movflt(dst, min ? b : a);
  614   else
  615     __ movdbl(dst, min ? b : a);
  616 
  617   __ jmp(done);
  618 
  619   __ bind(nan);
  620   if (single) {
  621     __ movl(rt, 0x7fc00000); // Float.NaN
  622     __ movdl(dst, rt);
  623   }
  624   else {
  625     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  626     __ movdq(dst, rt);
  627   }
  628   __ jmp(done);
  629 
  630   __ bind(below);
  631   if (single)
  632     __ movflt(dst, min ? a : b);
  633   else
  634     __ movdbl(dst, min ? a : b);
  635 
  636   __ bind(done);
  637 }
  638 
  639 //=============================================================================
  640 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  641 
  642 int ConstantTable::calculate_table_base_offset() const {
  643   return 0;  // absolute addressing, no offset
  644 }
  645 
  646 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  647 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  648   ShouldNotReachHere();
  649 }
  650 
  651 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  652   // Empty encoding
  653 }
  654 
  655 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  656   return 0;
  657 }
  658 
  659 #ifndef PRODUCT
  660 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  661   st->print("# MachConstantBaseNode (empty encoding)");
  662 }
  663 #endif
  664 
  665 
  666 //=============================================================================
  667 #ifndef PRODUCT
  668 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  669   Compile* C = ra_->C;
  670 
  671   int framesize = C->output()->frame_size_in_bytes();
  672   int bangsize = C->output()->bang_size_in_bytes();
  673   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  674   // Remove wordSize for return addr which is already pushed.
  675   framesize -= wordSize;
  676 
  677   if (C->output()->need_stack_bang(bangsize)) {
  678     framesize -= wordSize;
  679     st->print("# stack bang (%d bytes)", bangsize);
  680     st->print("\n\t");
  681     st->print("pushq   rbp\t# Save rbp");
  682     if (PreserveFramePointer) {
  683         st->print("\n\t");
  684         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  685     }
  686     if (framesize) {
  687       st->print("\n\t");
  688       st->print("subq    rsp, #%d\t# Create frame",framesize);
  689     }
  690   } else {
  691     st->print("subq    rsp, #%d\t# Create frame",framesize);
  692     st->print("\n\t");
  693     framesize -= wordSize;
  694     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  695     if (PreserveFramePointer) {
  696       st->print("\n\t");
  697       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  698       if (framesize > 0) {
  699         st->print("\n\t");
  700         st->print("addq    rbp, #%d", framesize);
  701       }
  702     }
  703   }
  704 
  705   if (VerifyStackAtCalls) {
  706     st->print("\n\t");
  707     framesize -= wordSize;
  708     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  709 #ifdef ASSERT
  710     st->print("\n\t");
  711     st->print("# stack alignment check");
  712 #endif
  713   }
  714   if (C->stub_function() != nullptr && BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) {
  715     st->print("\n\t");
  716     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
  717     st->print("\n\t");
  718     st->print("je      fast_entry\t");
  719     st->print("\n\t");
  720     st->print("call    #nmethod_entry_barrier_stub\t");
  721     st->print("\n\tfast_entry:");
  722   }
  723   st->cr();
  724 }
  725 #endif
  726 
  727 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  728   Compile* C = ra_->C;
  729   C2_MacroAssembler _masm(&cbuf);
  730 
  731   __ verified_entry(C);
  732 
  733   if (ra_->C->stub_function() == nullptr) {
  734     __ entry_barrier();
  735   }
  736 
  737   if (!Compile::current()->output()->in_scratch_emit_size()) {
  738     __ bind(*_verified_entry);
  739   }
  740 
  741   C->output()->set_frame_complete(cbuf.insts_size());
  742 
  743   if (C->has_mach_constant_base_node()) {
  744     // NOTE: We set the table base offset here because users might be
  745     // emitted before MachConstantBaseNode.
  746     ConstantTable& constant_table = C->output()->constant_table();
  747     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  748   }
  749 }
  750 
  751 int MachPrologNode::reloc() const
  752 {
  753   return 0; // a large enough number
  754 }
  755 
  756 //=============================================================================
  757 #ifndef PRODUCT
  758 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  759 {
  760   Compile* C = ra_->C;
  761   if (generate_vzeroupper(C)) {
  762     st->print("vzeroupper");
  763     st->cr(); st->print("\t");
  764   }
  765 
  766   int framesize = C->output()->frame_size_in_bytes();
  767   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  768   // Remove word for return adr already pushed
  769   // and RBP
  770   framesize -= 2*wordSize;
  771 
  772   if (framesize) {
  773     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  774     st->print("\t");
  775   }
  776 
  777   st->print_cr("popq    rbp");
  778   if (do_polling() && C->is_method_compilation()) {
  779     st->print("\t");
  780     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
  781                  "ja      #safepoint_stub\t"
  782                  "# Safepoint: poll for GC");
  783   }
  784 }
  785 #endif
  786 
  787 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  788 {
  789   Compile* C = ra_->C;
  790   MacroAssembler _masm(&cbuf);
  791 
  792   if (generate_vzeroupper(C)) {
  793     // Clear upper bits of YMM registers when current compiled code uses
  794     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  795     __ vzeroupper();
  796   }
  797 
  798   // Subtract two words to account for return address and rbp
  799   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  800   __ remove_frame(initial_framesize, C->needs_stack_repair());
  801 
  802   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  803     __ reserved_stack_check();
  804   }
  805 
  806   if (do_polling() && C->is_method_compilation()) {
  807     MacroAssembler _masm(&cbuf);
  808     Label dummy_label;
  809     Label* code_stub = &dummy_label;
  810     if (!C->output()->in_scratch_emit_size()) {
  811       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  812       C->output()->add_stub(stub);
  813       code_stub = &stub->entry();
  814     }
  815     __ relocate(relocInfo::poll_return_type);
  816     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  817   }
  818 }
  819 
  820 int MachEpilogNode::reloc() const
  821 {
  822   return 2; // a large enough number
  823 }
  824 
  825 const Pipeline* MachEpilogNode::pipeline() const
  826 {
  827   return MachNode::pipeline_class();
  828 }
  829 
  830 //=============================================================================
  831 
  832 enum RC {
  833   rc_bad,
  834   rc_int,
  835   rc_kreg,
  836   rc_float,
  837   rc_stack
  838 };
  839 
  840 static enum RC rc_class(OptoReg::Name reg)
  841 {
  842   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  843 
  844   if (OptoReg::is_stack(reg)) return rc_stack;
  845 
  846   VMReg r = OptoReg::as_VMReg(reg);
  847 
  848   if (r->is_Register()) return rc_int;
  849 
  850   if (r->is_KRegister()) return rc_kreg;
  851 
  852   assert(r->is_XMMRegister(), "must be");
  853   return rc_float;
  854 }
  855 
  856 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  857 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  858                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  859 
  860 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  861                      int stack_offset, int reg, uint ireg, outputStream* st);
  862 
  863 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  864                                       int dst_offset, uint ireg, outputStream* st) {
  865   if (cbuf) {
  866     MacroAssembler _masm(cbuf);
  867     switch (ireg) {
  868     case Op_VecS:
  869       __ movq(Address(rsp, -8), rax);
  870       __ movl(rax, Address(rsp, src_offset));
  871       __ movl(Address(rsp, dst_offset), rax);
  872       __ movq(rax, Address(rsp, -8));
  873       break;
  874     case Op_VecD:
  875       __ pushq(Address(rsp, src_offset));
  876       __ popq (Address(rsp, dst_offset));
  877       break;
  878     case Op_VecX:
  879       __ pushq(Address(rsp, src_offset));
  880       __ popq (Address(rsp, dst_offset));
  881       __ pushq(Address(rsp, src_offset+8));
  882       __ popq (Address(rsp, dst_offset+8));
  883       break;
  884     case Op_VecY:
  885       __ vmovdqu(Address(rsp, -32), xmm0);
  886       __ vmovdqu(xmm0, Address(rsp, src_offset));
  887       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  888       __ vmovdqu(xmm0, Address(rsp, -32));
  889       break;
  890     case Op_VecZ:
  891       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  892       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  893       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  894       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  895       break;
  896     default:
  897       ShouldNotReachHere();
  898     }
  899 #ifndef PRODUCT
  900   } else {
  901     switch (ireg) {
  902     case Op_VecS:
  903       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
  904                 "movl    rax, [rsp + #%d]\n\t"
  905                 "movl    [rsp + #%d], rax\n\t"
  906                 "movq    rax, [rsp - #8]",
  907                 src_offset, dst_offset);
  908       break;
  909     case Op_VecD:
  910       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  911                 "popq    [rsp + #%d]",
  912                 src_offset, dst_offset);
  913       break;
  914      case Op_VecX:
  915       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
  916                 "popq    [rsp + #%d]\n\t"
  917                 "pushq   [rsp + #%d]\n\t"
  918                 "popq    [rsp + #%d]",
  919                 src_offset, dst_offset, src_offset+8, dst_offset+8);
  920       break;
  921     case Op_VecY:
  922       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
  923                 "vmovdqu xmm0, [rsp + #%d]\n\t"
  924                 "vmovdqu [rsp + #%d], xmm0\n\t"
  925                 "vmovdqu xmm0, [rsp - #32]",
  926                 src_offset, dst_offset);
  927       break;
  928     case Op_VecZ:
  929       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
  930                 "vmovdqu xmm0, [rsp + #%d]\n\t"
  931                 "vmovdqu [rsp + #%d], xmm0\n\t"
  932                 "vmovdqu xmm0, [rsp - #64]",
  933                 src_offset, dst_offset);
  934       break;
  935     default:
  936       ShouldNotReachHere();
  937     }
  938 #endif
  939   }
  940 }
  941 
  942 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
  943                                        PhaseRegAlloc* ra_,
  944                                        bool do_size,
  945                                        outputStream* st) const {
  946   assert(cbuf != nullptr || st  != nullptr, "sanity");
  947   // Get registers to move
  948   OptoReg::Name src_second = ra_->get_reg_second(in(1));
  949   OptoReg::Name src_first = ra_->get_reg_first(in(1));
  950   OptoReg::Name dst_second = ra_->get_reg_second(this);
  951   OptoReg::Name dst_first = ra_->get_reg_first(this);
  952 
  953   enum RC src_second_rc = rc_class(src_second);
  954   enum RC src_first_rc = rc_class(src_first);
  955   enum RC dst_second_rc = rc_class(dst_second);
  956   enum RC dst_first_rc = rc_class(dst_first);
  957 
  958   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
  959          "must move at least 1 register" );
  960 
  961   if (src_first == dst_first && src_second == dst_second) {
  962     // Self copy, no move
  963     return 0;
  964   }
  965   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
  966     uint ireg = ideal_reg();
  967     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
  968     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
  969     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
  970       // mem -> mem
  971       int src_offset = ra_->reg2offset(src_first);
  972       int dst_offset = ra_->reg2offset(dst_first);
  973       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
  974     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
  975       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
  976     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
  977       int stack_offset = ra_->reg2offset(dst_first);
  978       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
  979     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
  980       int stack_offset = ra_->reg2offset(src_first);
  981       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
  982     } else {
  983       ShouldNotReachHere();
  984     }
  985     return 0;
  986   }
  987   if (src_first_rc == rc_stack) {
  988     // mem ->
  989     if (dst_first_rc == rc_stack) {
  990       // mem -> mem
  991       assert(src_second != dst_first, "overlap");
  992       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
  993           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
  994         // 64-bit
  995         int src_offset = ra_->reg2offset(src_first);
  996         int dst_offset = ra_->reg2offset(dst_first);
  997         if (cbuf) {
  998           MacroAssembler _masm(cbuf);
  999           __ pushq(Address(rsp, src_offset));
 1000           __ popq (Address(rsp, dst_offset));
 1001 #ifndef PRODUCT
 1002         } else {
 1003           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1004                     "popq    [rsp + #%d]",
 1005                      src_offset, dst_offset);
 1006 #endif
 1007         }
 1008       } else {
 1009         // 32-bit
 1010         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1011         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1012         // No pushl/popl, so:
 1013         int src_offset = ra_->reg2offset(src_first);
 1014         int dst_offset = ra_->reg2offset(dst_first);
 1015         if (cbuf) {
 1016           MacroAssembler _masm(cbuf);
 1017           __ movq(Address(rsp, -8), rax);
 1018           __ movl(rax, Address(rsp, src_offset));
 1019           __ movl(Address(rsp, dst_offset), rax);
 1020           __ movq(rax, Address(rsp, -8));
 1021 #ifndef PRODUCT
 1022         } else {
 1023           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1024                     "movl    rax, [rsp + #%d]\n\t"
 1025                     "movl    [rsp + #%d], rax\n\t"
 1026                     "movq    rax, [rsp - #8]",
 1027                      src_offset, dst_offset);
 1028 #endif
 1029         }
 1030       }
 1031       return 0;
 1032     } else if (dst_first_rc == rc_int) {
 1033       // mem -> gpr
 1034       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1035           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1036         // 64-bit
 1037         int offset = ra_->reg2offset(src_first);
 1038         if (cbuf) {
 1039           MacroAssembler _masm(cbuf);
 1040           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1041 #ifndef PRODUCT
 1042         } else {
 1043           st->print("movq    %s, [rsp + #%d]\t# spill",
 1044                      Matcher::regName[dst_first],
 1045                      offset);
 1046 #endif
 1047         }
 1048       } else {
 1049         // 32-bit
 1050         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1051         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1052         int offset = ra_->reg2offset(src_first);
 1053         if (cbuf) {
 1054           MacroAssembler _masm(cbuf);
 1055           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1056 #ifndef PRODUCT
 1057         } else {
 1058           st->print("movl    %s, [rsp + #%d]\t# spill",
 1059                      Matcher::regName[dst_first],
 1060                      offset);
 1061 #endif
 1062         }
 1063       }
 1064       return 0;
 1065     } else if (dst_first_rc == rc_float) {
 1066       // mem-> xmm
 1067       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1068           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1069         // 64-bit
 1070         int offset = ra_->reg2offset(src_first);
 1071         if (cbuf) {
 1072           MacroAssembler _masm(cbuf);
 1073           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1074 #ifndef PRODUCT
 1075         } else {
 1076           st->print("%s  %s, [rsp + #%d]\t# spill",
 1077                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1078                      Matcher::regName[dst_first],
 1079                      offset);
 1080 #endif
 1081         }
 1082       } else {
 1083         // 32-bit
 1084         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1085         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1086         int offset = ra_->reg2offset(src_first);
 1087         if (cbuf) {
 1088           MacroAssembler _masm(cbuf);
 1089           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1090 #ifndef PRODUCT
 1091         } else {
 1092           st->print("movss   %s, [rsp + #%d]\t# spill",
 1093                      Matcher::regName[dst_first],
 1094                      offset);
 1095 #endif
 1096         }
 1097       }
 1098       return 0;
 1099     } else if (dst_first_rc == rc_kreg) {
 1100       // mem -> kreg
 1101       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1102           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1103         // 64-bit
 1104         int offset = ra_->reg2offset(src_first);
 1105         if (cbuf) {
 1106           MacroAssembler _masm(cbuf);
 1107           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1108 #ifndef PRODUCT
 1109         } else {
 1110           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1111                      Matcher::regName[dst_first],
 1112                      offset);
 1113 #endif
 1114         }
 1115       }
 1116       return 0;
 1117     }
 1118   } else if (src_first_rc == rc_int) {
 1119     // gpr ->
 1120     if (dst_first_rc == rc_stack) {
 1121       // gpr -> mem
 1122       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1123           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1124         // 64-bit
 1125         int offset = ra_->reg2offset(dst_first);
 1126         if (cbuf) {
 1127           MacroAssembler _masm(cbuf);
 1128           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1129 #ifndef PRODUCT
 1130         } else {
 1131           st->print("movq    [rsp + #%d], %s\t# spill",
 1132                      offset,
 1133                      Matcher::regName[src_first]);
 1134 #endif
 1135         }
 1136       } else {
 1137         // 32-bit
 1138         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1139         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1140         int offset = ra_->reg2offset(dst_first);
 1141         if (cbuf) {
 1142           MacroAssembler _masm(cbuf);
 1143           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1144 #ifndef PRODUCT
 1145         } else {
 1146           st->print("movl    [rsp + #%d], %s\t# spill",
 1147                      offset,
 1148                      Matcher::regName[src_first]);
 1149 #endif
 1150         }
 1151       }
 1152       return 0;
 1153     } else if (dst_first_rc == rc_int) {
 1154       // gpr -> gpr
 1155       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1156           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1157         // 64-bit
 1158         if (cbuf) {
 1159           MacroAssembler _masm(cbuf);
 1160           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1161                   as_Register(Matcher::_regEncode[src_first]));
 1162 #ifndef PRODUCT
 1163         } else {
 1164           st->print("movq    %s, %s\t# spill",
 1165                      Matcher::regName[dst_first],
 1166                      Matcher::regName[src_first]);
 1167 #endif
 1168         }
 1169         return 0;
 1170       } else {
 1171         // 32-bit
 1172         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1173         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1174         if (cbuf) {
 1175           MacroAssembler _masm(cbuf);
 1176           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1177                   as_Register(Matcher::_regEncode[src_first]));
 1178 #ifndef PRODUCT
 1179         } else {
 1180           st->print("movl    %s, %s\t# spill",
 1181                      Matcher::regName[dst_first],
 1182                      Matcher::regName[src_first]);
 1183 #endif
 1184         }
 1185         return 0;
 1186       }
 1187     } else if (dst_first_rc == rc_float) {
 1188       // gpr -> xmm
 1189       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1190           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1191         // 64-bit
 1192         if (cbuf) {
 1193           MacroAssembler _masm(cbuf);
 1194           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1195 #ifndef PRODUCT
 1196         } else {
 1197           st->print("movdq   %s, %s\t# spill",
 1198                      Matcher::regName[dst_first],
 1199                      Matcher::regName[src_first]);
 1200 #endif
 1201         }
 1202       } else {
 1203         // 32-bit
 1204         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1205         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1206         if (cbuf) {
 1207           MacroAssembler _masm(cbuf);
 1208           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1209 #ifndef PRODUCT
 1210         } else {
 1211           st->print("movdl   %s, %s\t# spill",
 1212                      Matcher::regName[dst_first],
 1213                      Matcher::regName[src_first]);
 1214 #endif
 1215         }
 1216       }
 1217       return 0;
 1218     } else if (dst_first_rc == rc_kreg) {
 1219       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1220           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1221         // 64-bit
 1222         if (cbuf) {
 1223           MacroAssembler _masm(cbuf);
 1224           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1225   #ifndef PRODUCT
 1226         } else {
 1227            st->print("kmovq   %s, %s\t# spill",
 1228                        Matcher::regName[dst_first],
 1229                        Matcher::regName[src_first]);
 1230   #endif
 1231         }
 1232       }
 1233       Unimplemented();
 1234       return 0;
 1235     }
 1236   } else if (src_first_rc == rc_float) {
 1237     // xmm ->
 1238     if (dst_first_rc == rc_stack) {
 1239       // xmm -> mem
 1240       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1241           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1242         // 64-bit
 1243         int offset = ra_->reg2offset(dst_first);
 1244         if (cbuf) {
 1245           MacroAssembler _masm(cbuf);
 1246           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1247 #ifndef PRODUCT
 1248         } else {
 1249           st->print("movsd   [rsp + #%d], %s\t# spill",
 1250                      offset,
 1251                      Matcher::regName[src_first]);
 1252 #endif
 1253         }
 1254       } else {
 1255         // 32-bit
 1256         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1257         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1258         int offset = ra_->reg2offset(dst_first);
 1259         if (cbuf) {
 1260           MacroAssembler _masm(cbuf);
 1261           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1262 #ifndef PRODUCT
 1263         } else {
 1264           st->print("movss   [rsp + #%d], %s\t# spill",
 1265                      offset,
 1266                      Matcher::regName[src_first]);
 1267 #endif
 1268         }
 1269       }
 1270       return 0;
 1271     } else if (dst_first_rc == rc_int) {
 1272       // xmm -> gpr
 1273       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1274           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1275         // 64-bit
 1276         if (cbuf) {
 1277           MacroAssembler _masm(cbuf);
 1278           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1279 #ifndef PRODUCT
 1280         } else {
 1281           st->print("movdq   %s, %s\t# spill",
 1282                      Matcher::regName[dst_first],
 1283                      Matcher::regName[src_first]);
 1284 #endif
 1285         }
 1286       } else {
 1287         // 32-bit
 1288         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1289         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1290         if (cbuf) {
 1291           MacroAssembler _masm(cbuf);
 1292           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1293 #ifndef PRODUCT
 1294         } else {
 1295           st->print("movdl   %s, %s\t# spill",
 1296                      Matcher::regName[dst_first],
 1297                      Matcher::regName[src_first]);
 1298 #endif
 1299         }
 1300       }
 1301       return 0;
 1302     } else if (dst_first_rc == rc_float) {
 1303       // xmm -> xmm
 1304       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1305           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1306         // 64-bit
 1307         if (cbuf) {
 1308           MacroAssembler _masm(cbuf);
 1309           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1310 #ifndef PRODUCT
 1311         } else {
 1312           st->print("%s  %s, %s\t# spill",
 1313                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1314                      Matcher::regName[dst_first],
 1315                      Matcher::regName[src_first]);
 1316 #endif
 1317         }
 1318       } else {
 1319         // 32-bit
 1320         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1321         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1322         if (cbuf) {
 1323           MacroAssembler _masm(cbuf);
 1324           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1325 #ifndef PRODUCT
 1326         } else {
 1327           st->print("%s  %s, %s\t# spill",
 1328                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1329                      Matcher::regName[dst_first],
 1330                      Matcher::regName[src_first]);
 1331 #endif
 1332         }
 1333       }
 1334       return 0;
 1335     } else if (dst_first_rc == rc_kreg) {
 1336       assert(false, "Illegal spilling");
 1337       return 0;
 1338     }
 1339   } else if (src_first_rc == rc_kreg) {
 1340     if (dst_first_rc == rc_stack) {
 1341       // mem -> kreg
 1342       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1343           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1344         // 64-bit
 1345         int offset = ra_->reg2offset(dst_first);
 1346         if (cbuf) {
 1347           MacroAssembler _masm(cbuf);
 1348           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1349 #ifndef PRODUCT
 1350         } else {
 1351           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1352                      offset,
 1353                      Matcher::regName[src_first]);
 1354 #endif
 1355         }
 1356       }
 1357       return 0;
 1358     } else if (dst_first_rc == rc_int) {
 1359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1361         // 64-bit
 1362         if (cbuf) {
 1363           MacroAssembler _masm(cbuf);
 1364           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1365 #ifndef PRODUCT
 1366         } else {
 1367          st->print("kmovq   %s, %s\t# spill",
 1368                      Matcher::regName[dst_first],
 1369                      Matcher::regName[src_first]);
 1370 #endif
 1371         }
 1372       }
 1373       Unimplemented();
 1374       return 0;
 1375     } else if (dst_first_rc == rc_kreg) {
 1376       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1377           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1378         // 64-bit
 1379         if (cbuf) {
 1380           MacroAssembler _masm(cbuf);
 1381           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1382 #ifndef PRODUCT
 1383         } else {
 1384          st->print("kmovq   %s, %s\t# spill",
 1385                      Matcher::regName[dst_first],
 1386                      Matcher::regName[src_first]);
 1387 #endif
 1388         }
 1389       }
 1390       return 0;
 1391     } else if (dst_first_rc == rc_float) {
 1392       assert(false, "Illegal spill");
 1393       return 0;
 1394     }
 1395   }
 1396 
 1397   assert(0," foo ");
 1398   Unimplemented();
 1399   return 0;
 1400 }
 1401 
 1402 #ifndef PRODUCT
 1403 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1404   implementation(nullptr, ra_, false, st);
 1405 }
 1406 #endif
 1407 
 1408 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1409   implementation(&cbuf, ra_, false, nullptr);
 1410 }
 1411 
 1412 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1413   return MachNode::size(ra_);
 1414 }
 1415 
 1416 //=============================================================================
 1417 #ifndef PRODUCT
 1418 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1419 {
 1420   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1421   int reg = ra_->get_reg_first(this);
 1422   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1423             Matcher::regName[reg], offset);
 1424 }
 1425 #endif
 1426 
 1427 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1428 {
 1429   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1430   int reg = ra_->get_encode(this);
 1431 
 1432   MacroAssembler masm(&cbuf);
 1433   masm.lea(as_Register(reg), Address(rsp, offset));
 1434 }
 1435 
 1436 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1437 {
 1438   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1439   return (offset < 0x80) ? 5 : 8; // REX
 1440 }
 1441 
 1442 //=============================================================================
 1443 #ifndef PRODUCT
 1444 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1445 {
 1446   st->print_cr("MachVEPNode");
 1447 }
 1448 #endif
 1449 
 1450 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1451 {
 1452   C2_MacroAssembler _masm(&cbuf);
 1453   uint insts_size = cbuf.insts_size();
 1454   if (!_verified) {
 1455     if (UseCompressedClassPointers) {
 1456       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1457       __ cmpptr(rax, rscratch1);
 1458     } else {
 1459       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1460     }
 1461     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1462   } else {
 1463     // TODO 8284443 Avoid creation of temporary frame
 1464     if (ra_->C->stub_function() == nullptr) {
 1465       __ verified_entry(ra_->C, 0);
 1466       __ entry_barrier();
 1467       int initial_framesize = ra_->C->output()->frame_size_in_bytes() - 2*wordSize;
 1468       __ remove_frame(initial_framesize, false);
 1469     }
 1470     // Unpack inline type args passed as oop and then jump to
 1471     // the verified entry point (skipping the unverified entry).
 1472     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1473     // Emit code for verified entry and save increment for stack repair on return
 1474     __ verified_entry(ra_->C, sp_inc);
 1475     if (Compile::current()->output()->in_scratch_emit_size()) {
 1476       Label dummy_verified_entry;
 1477       __ jmp(dummy_verified_entry);
 1478     } else {
 1479       __ jmp(*_verified_entry);
 1480     }
 1481   }
 1482   /* WARNING these NOPs are critical so that verified entry point is properly
 1483      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1484   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1485   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1486   if (nops_cnt > 0) {
 1487     __ nop(nops_cnt);
 1488   }
 1489 }
 1490 
 1491 //=============================================================================
 1492 #ifndef PRODUCT
 1493 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1494 {
 1495   if (UseCompressedClassPointers) {
 1496     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1497     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1498     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1499   } else {
 1500     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1501                  "# Inline cache check");
 1502   }
 1503   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1504   st->print_cr("\tnop\t# nops to align entry point");
 1505 }
 1506 #endif
 1507 
 1508 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1509 {
 1510   MacroAssembler masm(&cbuf);
 1511   uint insts_size = cbuf.insts_size();
 1512   if (UseCompressedClassPointers) {
 1513     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1514     masm.cmpptr(rax, rscratch1);
 1515   } else {
 1516     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1517   }
 1518 
 1519   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1520 
 1521   /* WARNING these NOPs are critical so that verified entry point is properly
 1522      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1523   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1524   if (OptoBreakpoint) {
 1525     // Leave space for int3
 1526     nops_cnt -= 1;
 1527   }
 1528   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1529   if (nops_cnt > 0)
 1530     masm.nop(nops_cnt);
 1531 }
 1532 
 1533 //=============================================================================
 1534 
 1535 bool Matcher::supports_vector_calling_convention(void) {
 1536   if (EnableVectorSupport && UseVectorStubs) {
 1537     return true;
 1538   }
 1539   return false;
 1540 }
 1541 
 1542 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1543   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1544   int lo = XMM0_num;
 1545   int hi = XMM0b_num;
 1546   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1547   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1548   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1549   return OptoRegPair(hi, lo);
 1550 }
 1551 
 1552 // Is this branch offset short enough that a short branch can be used?
 1553 //
 1554 // NOTE: If the platform does not provide any short branch variants, then
 1555 //       this method should return false for offset 0.
 1556 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1557   // The passed offset is relative to address of the branch.
 1558   // On 86 a branch displacement is calculated relative to address
 1559   // of a next instruction.
 1560   offset -= br_size;
 1561 
 1562   // the short version of jmpConUCF2 contains multiple branches,
 1563   // making the reach slightly less
 1564   if (rule == jmpConUCF2_rule)
 1565     return (-126 <= offset && offset <= 125);
 1566   return (-128 <= offset && offset <= 127);
 1567 }
 1568 
 1569 // Return whether or not this register is ever used as an argument.
 1570 // This function is used on startup to build the trampoline stubs in
 1571 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1572 // call in the trampoline, and arguments in those registers not be
 1573 // available to the callee.
 1574 bool Matcher::can_be_java_arg(int reg)
 1575 {
 1576   return
 1577     reg ==  RDI_num || reg == RDI_H_num ||
 1578     reg ==  RSI_num || reg == RSI_H_num ||
 1579     reg ==  RDX_num || reg == RDX_H_num ||
 1580     reg ==  RCX_num || reg == RCX_H_num ||
 1581     reg ==   R8_num || reg ==  R8_H_num ||
 1582     reg ==   R9_num || reg ==  R9_H_num ||
 1583     reg ==  R12_num || reg == R12_H_num ||
 1584     reg == XMM0_num || reg == XMM0b_num ||
 1585     reg == XMM1_num || reg == XMM1b_num ||
 1586     reg == XMM2_num || reg == XMM2b_num ||
 1587     reg == XMM3_num || reg == XMM3b_num ||
 1588     reg == XMM4_num || reg == XMM4b_num ||
 1589     reg == XMM5_num || reg == XMM5b_num ||
 1590     reg == XMM6_num || reg == XMM6b_num ||
 1591     reg == XMM7_num || reg == XMM7b_num;
 1592 }
 1593 
 1594 bool Matcher::is_spillable_arg(int reg)
 1595 {
 1596   return can_be_java_arg(reg);
 1597 }
 1598 
 1599 uint Matcher::int_pressure_limit()
 1600 {
 1601   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1602 }
 1603 
 1604 uint Matcher::float_pressure_limit()
 1605 {
 1606   // After experiment around with different values, the following default threshold
 1607   // works best for LCM's register pressure scheduling on x64.
 1608   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1609   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1610   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1611 }
 1612 
 1613 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1614   // In 64 bit mode a code which use multiply when
 1615   // devisor is constant is faster than hardware
 1616   // DIV instruction (it uses MulHiL).
 1617   return false;
 1618 }
 1619 
 1620 // Register for DIVI projection of divmodI
 1621 RegMask Matcher::divI_proj_mask() {
 1622   return INT_RAX_REG_mask();
 1623 }
 1624 
 1625 // Register for MODI projection of divmodI
 1626 RegMask Matcher::modI_proj_mask() {
 1627   return INT_RDX_REG_mask();
 1628 }
 1629 
 1630 // Register for DIVL projection of divmodL
 1631 RegMask Matcher::divL_proj_mask() {
 1632   return LONG_RAX_REG_mask();
 1633 }
 1634 
 1635 // Register for MODL projection of divmodL
 1636 RegMask Matcher::modL_proj_mask() {
 1637   return LONG_RDX_REG_mask();
 1638 }
 1639 
 1640 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1641 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1642     return NO_REG_mask();
 1643 }
 1644 
 1645 %}
 1646 
 1647 //----------ENCODING BLOCK-----------------------------------------------------
 1648 // This block specifies the encoding classes used by the compiler to
 1649 // output byte streams.  Encoding classes are parameterized macros
 1650 // used by Machine Instruction Nodes in order to generate the bit
 1651 // encoding of the instruction.  Operands specify their base encoding
 1652 // interface with the interface keyword.  There are currently
 1653 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1654 // COND_INTER.  REG_INTER causes an operand to generate a function
 1655 // which returns its register number when queried.  CONST_INTER causes
 1656 // an operand to generate a function which returns the value of the
 1657 // constant when queried.  MEMORY_INTER causes an operand to generate
 1658 // four functions which return the Base Register, the Index Register,
 1659 // the Scale Value, and the Offset Value of the operand when queried.
 1660 // COND_INTER causes an operand to generate six functions which return
 1661 // the encoding code (ie - encoding bits for the instruction)
 1662 // associated with each basic boolean condition for a conditional
 1663 // instruction.
 1664 //
 1665 // Instructions specify two basic values for encoding.  Again, a
 1666 // function is available to check if the constant displacement is an
 1667 // oop. They use the ins_encode keyword to specify their encoding
 1668 // classes (which must be a sequence of enc_class names, and their
 1669 // parameters, specified in the encoding block), and they use the
 1670 // opcode keyword to specify, in order, their primary, secondary, and
 1671 // tertiary opcode.  Only the opcode sections which a particular
 1672 // instruction needs for encoding need to be specified.
 1673 encode %{
 1674   enc_class cdql_enc(no_rax_rdx_RegI div)
 1675   %{
 1676     // Full implementation of Java idiv and irem; checks for
 1677     // special case as described in JVM spec., p.243 & p.271.
 1678     //
 1679     //         normal case                           special case
 1680     //
 1681     // input : rax: dividend                         min_int
 1682     //         reg: divisor                          -1
 1683     //
 1684     // output: rax: quotient  (= rax idiv reg)       min_int
 1685     //         rdx: remainder (= rax irem reg)       0
 1686     //
 1687     //  Code sequnce:
 1688     //
 1689     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1690     //    5:   75 07/08                jne    e <normal>
 1691     //    7:   33 d2                   xor    %edx,%edx
 1692     //  [div >= 8 -> offset + 1]
 1693     //  [REX_B]
 1694     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1695     //    c:   74 03/04                je     11 <done>
 1696     // 000000000000000e <normal>:
 1697     //    e:   99                      cltd
 1698     //  [div >= 8 -> offset + 1]
 1699     //  [REX_B]
 1700     //    f:   f7 f9                   idiv   $div
 1701     // 0000000000000011 <done>:
 1702     MacroAssembler _masm(&cbuf);
 1703     Label normal;
 1704     Label done;
 1705 
 1706     // cmp    $0x80000000,%eax
 1707     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1708 
 1709     // jne    e <normal>
 1710     __ jccb(Assembler::notEqual, normal);
 1711 
 1712     // xor    %edx,%edx
 1713     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1714 
 1715     // cmp    $0xffffffffffffffff,%ecx
 1716     __ cmpl($div$$Register, -1);
 1717 
 1718     // je     11 <done>
 1719     __ jccb(Assembler::equal, done);
 1720 
 1721     // <normal>
 1722     // cltd
 1723     __ bind(normal);
 1724     __ cdql();
 1725 
 1726     // idivl
 1727     // <done>
 1728     __ idivl($div$$Register);
 1729     __ bind(done);
 1730   %}
 1731 
 1732   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1733   %{
 1734     // Full implementation of Java ldiv and lrem; checks for
 1735     // special case as described in JVM spec., p.243 & p.271.
 1736     //
 1737     //         normal case                           special case
 1738     //
 1739     // input : rax: dividend                         min_long
 1740     //         reg: divisor                          -1
 1741     //
 1742     // output: rax: quotient  (= rax idiv reg)       min_long
 1743     //         rdx: remainder (= rax irem reg)       0
 1744     //
 1745     //  Code sequnce:
 1746     //
 1747     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1748     //    7:   00 00 80
 1749     //    a:   48 39 d0                cmp    %rdx,%rax
 1750     //    d:   75 08                   jne    17 <normal>
 1751     //    f:   33 d2                   xor    %edx,%edx
 1752     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 1753     //   15:   74 05                   je     1c <done>
 1754     // 0000000000000017 <normal>:
 1755     //   17:   48 99                   cqto
 1756     //   19:   48 f7 f9                idiv   $div
 1757     // 000000000000001c <done>:
 1758     MacroAssembler _masm(&cbuf);
 1759     Label normal;
 1760     Label done;
 1761 
 1762     // mov    $0x8000000000000000,%rdx
 1763     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 1764 
 1765     // cmp    %rdx,%rax
 1766     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 1767 
 1768     // jne    17 <normal>
 1769     __ jccb(Assembler::notEqual, normal);
 1770 
 1771     // xor    %edx,%edx
 1772     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1773 
 1774     // cmp    $0xffffffffffffffff,$div
 1775     __ cmpq($div$$Register, -1);
 1776 
 1777     // je     1e <done>
 1778     __ jccb(Assembler::equal, done);
 1779 
 1780     // <normal>
 1781     // cqto
 1782     __ bind(normal);
 1783     __ cdqq();
 1784 
 1785     // idivq (note: must be emitted by the user of this rule)
 1786     // <done>
 1787     __ idivq($div$$Register);
 1788     __ bind(done);
 1789   %}
 1790 
 1791   enc_class enc_PartialSubtypeCheck()
 1792   %{
 1793     Register Rrdi = as_Register(RDI_enc); // result register
 1794     Register Rrax = as_Register(RAX_enc); // super class
 1795     Register Rrcx = as_Register(RCX_enc); // killed
 1796     Register Rrsi = as_Register(RSI_enc); // sub class
 1797     Label miss;
 1798     const bool set_cond_codes = true;
 1799 
 1800     MacroAssembler _masm(&cbuf);
 1801     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 1802                                      nullptr, &miss,
 1803                                      /*set_cond_codes:*/ true);
 1804     if ($primary) {
 1805       __ xorptr(Rrdi, Rrdi);
 1806     }
 1807     __ bind(miss);
 1808   %}
 1809 
 1810   enc_class clear_avx %{
 1811     debug_only(int off0 = cbuf.insts_size());
 1812     if (generate_vzeroupper(Compile::current())) {
 1813       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 1814       // Clear upper bits of YMM registers when current compiled code uses
 1815       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1816       MacroAssembler _masm(&cbuf);
 1817       __ vzeroupper();
 1818     }
 1819     debug_only(int off1 = cbuf.insts_size());
 1820     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 1821   %}
 1822 
 1823   enc_class Java_To_Runtime(method meth) %{
 1824     // No relocation needed
 1825     MacroAssembler _masm(&cbuf);
 1826     __ mov64(r10, (int64_t) $meth$$method);
 1827     __ call(r10);
 1828     __ post_call_nop();
 1829   %}
 1830 
 1831   enc_class Java_Static_Call(method meth)
 1832   %{
 1833     // JAVA STATIC CALL
 1834     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 1835     // determine who we intended to call.
 1836     MacroAssembler _masm(&cbuf);
 1837 
 1838     if (!_method) {
 1839       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 1840     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 1841       // The NOP here is purely to ensure that eliding a call to
 1842       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 1843       __ addr_nop_5();
 1844       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 1845     } else {
 1846       int method_index = resolved_method_index(cbuf);
 1847       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1848                                                   : static_call_Relocation::spec(method_index);
 1849       address mark = __ pc();
 1850       int call_offset = __ offset();
 1851       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 1852       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1853         // Calls of the same statically bound method can share
 1854         // a stub to the interpreter.
 1855         cbuf.shared_stub_to_interp_for(_method, call_offset);
 1856       } else {
 1857         // Emit stubs for static call.
 1858         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1859         if (stub == nullptr) {
 1860           ciEnv::current()->record_failure("CodeCache is full");
 1861           return;
 1862         }
 1863       }
 1864     }
 1865     __ post_call_nop();
 1866   %}
 1867 
 1868   enc_class Java_Dynamic_Call(method meth) %{
 1869     MacroAssembler _masm(&cbuf);
 1870     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1871     __ post_call_nop();
 1872   %}
 1873 
 1874 %}
 1875 
 1876 
 1877 
 1878 //----------FRAME--------------------------------------------------------------
 1879 // Definition of frame structure and management information.
 1880 //
 1881 //  S T A C K   L A Y O U T    Allocators stack-slot number
 1882 //                             |   (to get allocators register number
 1883 //  G  Owned by    |        |  v    add OptoReg::stack0())
 1884 //  r   CALLER     |        |
 1885 //  o     |        +--------+      pad to even-align allocators stack-slot
 1886 //  w     V        |  pad0  |        numbers; owned by CALLER
 1887 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 1888 //  h     ^        |   in   |  5
 1889 //        |        |  args  |  4   Holes in incoming args owned by SELF
 1890 //  |     |        |        |  3
 1891 //  |     |        +--------+
 1892 //  V     |        | old out|      Empty on Intel, window on Sparc
 1893 //        |    old |preserve|      Must be even aligned.
 1894 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 1895 //        |        |   in   |  3   area for Intel ret address
 1896 //     Owned by    |preserve|      Empty on Sparc.
 1897 //       SELF      +--------+
 1898 //        |        |  pad2  |  2   pad to align old SP
 1899 //        |        +--------+  1
 1900 //        |        | locks  |  0
 1901 //        |        +--------+----> OptoReg::stack0(), even aligned
 1902 //        |        |  pad1  | 11   pad to align new SP
 1903 //        |        +--------+
 1904 //        |        |        | 10
 1905 //        |        | spills |  9   spills
 1906 //        V        |        |  8   (pad0 slot for callee)
 1907 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 1908 //        ^        |  out   |  7
 1909 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 1910 //     Owned by    +--------+
 1911 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 1912 //        |    new |preserve|      Must be even-aligned.
 1913 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 1914 //        |        |        |
 1915 //
 1916 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 1917 //         known from SELF's arguments and the Java calling convention.
 1918 //         Region 6-7 is determined per call site.
 1919 // Note 2: If the calling convention leaves holes in the incoming argument
 1920 //         area, those holes are owned by SELF.  Holes in the outgoing area
 1921 //         are owned by the CALLEE.  Holes should not be necessary in the
 1922 //         incoming area, as the Java calling convention is completely under
 1923 //         the control of the AD file.  Doubles can be sorted and packed to
 1924 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 1925 //         varargs C calling conventions.
 1926 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 1927 //         even aligned with pad0 as needed.
 1928 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 1929 //         region 6-11 is even aligned; it may be padded out more so that
 1930 //         the region from SP to FP meets the minimum stack alignment.
 1931 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 1932 //         alignment.  Region 11, pad1, may be dynamically extended so that
 1933 //         SP meets the minimum alignment.
 1934 
 1935 frame
 1936 %{
 1937   // These three registers define part of the calling convention
 1938   // between compiled code and the interpreter.
 1939   inline_cache_reg(RAX);                // Inline Cache Register
 1940 
 1941   // Optional: name the operand used by cisc-spilling to access
 1942   // [stack_pointer + offset]
 1943   cisc_spilling_operand_name(indOffset32);
 1944 
 1945   // Number of stack slots consumed by locking an object
 1946   sync_stack_slots(2);
 1947 
 1948   // Compiled code's Frame Pointer
 1949   frame_pointer(RSP);
 1950 
 1951   // Interpreter stores its frame pointer in a register which is
 1952   // stored to the stack by I2CAdaptors.
 1953   // I2CAdaptors convert from interpreted java to compiled java.
 1954   interpreter_frame_pointer(RBP);
 1955 
 1956   // Stack alignment requirement
 1957   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 1958 
 1959   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 1960   // for calls to C.  Supports the var-args backing area for register parms.
 1961   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 1962 
 1963   // The after-PROLOG location of the return address.  Location of
 1964   // return address specifies a type (REG or STACK) and a number
 1965   // representing the register number (i.e. - use a register name) or
 1966   // stack slot.
 1967   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 1968   // Otherwise, it is above the locks and verification slot and alignment word
 1969   return_addr(STACK - 2 +
 1970               align_up((Compile::current()->in_preserve_stack_slots() +
 1971                         Compile::current()->fixed_slots()),
 1972                        stack_alignment_in_slots()));
 1973 
 1974   // Location of compiled Java return values.  Same as C for now.
 1975   return_value
 1976   %{
 1977     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 1978            "only return normal values");
 1979 
 1980     static const int lo[Op_RegL + 1] = {
 1981       0,
 1982       0,
 1983       RAX_num,  // Op_RegN
 1984       RAX_num,  // Op_RegI
 1985       RAX_num,  // Op_RegP
 1986       XMM0_num, // Op_RegF
 1987       XMM0_num, // Op_RegD
 1988       RAX_num   // Op_RegL
 1989     };
 1990     static const int hi[Op_RegL + 1] = {
 1991       0,
 1992       0,
 1993       OptoReg::Bad, // Op_RegN
 1994       OptoReg::Bad, // Op_RegI
 1995       RAX_H_num,    // Op_RegP
 1996       OptoReg::Bad, // Op_RegF
 1997       XMM0b_num,    // Op_RegD
 1998       RAX_H_num     // Op_RegL
 1999     };
 2000     // Excluded flags and vector registers.
 2001     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2002     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2003   %}
 2004 %}
 2005 
 2006 //----------ATTRIBUTES---------------------------------------------------------
 2007 //----------Operand Attributes-------------------------------------------------
 2008 op_attrib op_cost(0);        // Required cost attribute
 2009 
 2010 //----------Instruction Attributes---------------------------------------------
 2011 ins_attrib ins_cost(100);       // Required cost attribute
 2012 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2013 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2014                                 // a non-matching short branch variant
 2015                                 // of some long branch?
 2016 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2017                                 // be a power of 2) specifies the
 2018                                 // alignment that some part of the
 2019                                 // instruction (not necessarily the
 2020                                 // start) requires.  If > 1, a
 2021                                 // compute_padding() function must be
 2022                                 // provided for the instruction
 2023 
 2024 //----------OPERANDS-----------------------------------------------------------
 2025 // Operand definitions must precede instruction definitions for correct parsing
 2026 // in the ADLC because operands constitute user defined types which are used in
 2027 // instruction definitions.
 2028 
 2029 //----------Simple Operands----------------------------------------------------
 2030 // Immediate Operands
 2031 // Integer Immediate
 2032 operand immI()
 2033 %{
 2034   match(ConI);
 2035 
 2036   op_cost(10);
 2037   format %{ %}
 2038   interface(CONST_INTER);
 2039 %}
 2040 
 2041 // Constant for test vs zero
 2042 operand immI_0()
 2043 %{
 2044   predicate(n->get_int() == 0);
 2045   match(ConI);
 2046 
 2047   op_cost(0);
 2048   format %{ %}
 2049   interface(CONST_INTER);
 2050 %}
 2051 
 2052 // Constant for increment
 2053 operand immI_1()
 2054 %{
 2055   predicate(n->get_int() == 1);
 2056   match(ConI);
 2057 
 2058   op_cost(0);
 2059   format %{ %}
 2060   interface(CONST_INTER);
 2061 %}
 2062 
 2063 // Constant for decrement
 2064 operand immI_M1()
 2065 %{
 2066   predicate(n->get_int() == -1);
 2067   match(ConI);
 2068 
 2069   op_cost(0);
 2070   format %{ %}
 2071   interface(CONST_INTER);
 2072 %}
 2073 
 2074 operand immI_2()
 2075 %{
 2076   predicate(n->get_int() == 2);
 2077   match(ConI);
 2078 
 2079   op_cost(0);
 2080   format %{ %}
 2081   interface(CONST_INTER);
 2082 %}
 2083 
 2084 operand immI_4()
 2085 %{
 2086   predicate(n->get_int() == 4);
 2087   match(ConI);
 2088 
 2089   op_cost(0);
 2090   format %{ %}
 2091   interface(CONST_INTER);
 2092 %}
 2093 
 2094 operand immI_8()
 2095 %{
 2096   predicate(n->get_int() == 8);
 2097   match(ConI);
 2098 
 2099   op_cost(0);
 2100   format %{ %}
 2101   interface(CONST_INTER);
 2102 %}
 2103 
 2104 // Valid scale values for addressing modes
 2105 operand immI2()
 2106 %{
 2107   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2108   match(ConI);
 2109 
 2110   format %{ %}
 2111   interface(CONST_INTER);
 2112 %}
 2113 
 2114 operand immU7()
 2115 %{
 2116   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2117   match(ConI);
 2118 
 2119   op_cost(5);
 2120   format %{ %}
 2121   interface(CONST_INTER);
 2122 %}
 2123 
 2124 operand immI8()
 2125 %{
 2126   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2127   match(ConI);
 2128 
 2129   op_cost(5);
 2130   format %{ %}
 2131   interface(CONST_INTER);
 2132 %}
 2133 
 2134 operand immU8()
 2135 %{
 2136   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 2137   match(ConI);
 2138 
 2139   op_cost(5);
 2140   format %{ %}
 2141   interface(CONST_INTER);
 2142 %}
 2143 
 2144 operand immI16()
 2145 %{
 2146   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 2147   match(ConI);
 2148 
 2149   op_cost(10);
 2150   format %{ %}
 2151   interface(CONST_INTER);
 2152 %}
 2153 
 2154 // Int Immediate non-negative
 2155 operand immU31()
 2156 %{
 2157   predicate(n->get_int() >= 0);
 2158   match(ConI);
 2159 
 2160   op_cost(0);
 2161   format %{ %}
 2162   interface(CONST_INTER);
 2163 %}
 2164 
 2165 // Constant for long shifts
 2166 operand immI_32()
 2167 %{
 2168   predicate( n->get_int() == 32 );
 2169   match(ConI);
 2170 
 2171   op_cost(0);
 2172   format %{ %}
 2173   interface(CONST_INTER);
 2174 %}
 2175 
 2176 // Constant for long shifts
 2177 operand immI_64()
 2178 %{
 2179   predicate( n->get_int() == 64 );
 2180   match(ConI);
 2181 
 2182   op_cost(0);
 2183   format %{ %}
 2184   interface(CONST_INTER);
 2185 %}
 2186 
 2187 // Pointer Immediate
 2188 operand immP()
 2189 %{
 2190   match(ConP);
 2191 
 2192   op_cost(10);
 2193   format %{ %}
 2194   interface(CONST_INTER);
 2195 %}
 2196 
 2197 // nullptr Pointer Immediate
 2198 operand immP0()
 2199 %{
 2200   predicate(n->get_ptr() == 0);
 2201   match(ConP);
 2202 
 2203   op_cost(5);
 2204   format %{ %}
 2205   interface(CONST_INTER);
 2206 %}
 2207 
 2208 // Pointer Immediate
 2209 operand immN() %{
 2210   match(ConN);
 2211 
 2212   op_cost(10);
 2213   format %{ %}
 2214   interface(CONST_INTER);
 2215 %}
 2216 
 2217 operand immNKlass() %{
 2218   match(ConNKlass);
 2219 
 2220   op_cost(10);
 2221   format %{ %}
 2222   interface(CONST_INTER);
 2223 %}
 2224 
 2225 // nullptr Pointer Immediate
 2226 operand immN0() %{
 2227   predicate(n->get_narrowcon() == 0);
 2228   match(ConN);
 2229 
 2230   op_cost(5);
 2231   format %{ %}
 2232   interface(CONST_INTER);
 2233 %}
 2234 
 2235 operand immP31()
 2236 %{
 2237   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 2238             && (n->get_ptr() >> 31) == 0);
 2239   match(ConP);
 2240 
 2241   op_cost(5);
 2242   format %{ %}
 2243   interface(CONST_INTER);
 2244 %}
 2245 
 2246 
 2247 // Long Immediate
 2248 operand immL()
 2249 %{
 2250   match(ConL);
 2251 
 2252   op_cost(20);
 2253   format %{ %}
 2254   interface(CONST_INTER);
 2255 %}
 2256 
 2257 // Long Immediate 8-bit
 2258 operand immL8()
 2259 %{
 2260   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 2261   match(ConL);
 2262 
 2263   op_cost(5);
 2264   format %{ %}
 2265   interface(CONST_INTER);
 2266 %}
 2267 
 2268 // Long Immediate 32-bit unsigned
 2269 operand immUL32()
 2270 %{
 2271   predicate(n->get_long() == (unsigned int) (n->get_long()));
 2272   match(ConL);
 2273 
 2274   op_cost(10);
 2275   format %{ %}
 2276   interface(CONST_INTER);
 2277 %}
 2278 
 2279 // Long Immediate 32-bit signed
 2280 operand immL32()
 2281 %{
 2282   predicate(n->get_long() == (int) (n->get_long()));
 2283   match(ConL);
 2284 
 2285   op_cost(15);
 2286   format %{ %}
 2287   interface(CONST_INTER);
 2288 %}
 2289 
 2290 operand immL_Pow2()
 2291 %{
 2292   predicate(is_power_of_2((julong)n->get_long()));
 2293   match(ConL);
 2294 
 2295   op_cost(15);
 2296   format %{ %}
 2297   interface(CONST_INTER);
 2298 %}
 2299 
 2300 operand immL_NotPow2()
 2301 %{
 2302   predicate(is_power_of_2((julong)~n->get_long()));
 2303   match(ConL);
 2304 
 2305   op_cost(15);
 2306   format %{ %}
 2307   interface(CONST_INTER);
 2308 %}
 2309 
 2310 // Long Immediate zero
 2311 operand immL0()
 2312 %{
 2313   predicate(n->get_long() == 0L);
 2314   match(ConL);
 2315 
 2316   op_cost(10);
 2317   format %{ %}
 2318   interface(CONST_INTER);
 2319 %}
 2320 
 2321 // Constant for increment
 2322 operand immL1()
 2323 %{
 2324   predicate(n->get_long() == 1);
 2325   match(ConL);
 2326 
 2327   format %{ %}
 2328   interface(CONST_INTER);
 2329 %}
 2330 
 2331 // Constant for decrement
 2332 operand immL_M1()
 2333 %{
 2334   predicate(n->get_long() == -1);
 2335   match(ConL);
 2336 
 2337   format %{ %}
 2338   interface(CONST_INTER);
 2339 %}
 2340 
 2341 // Long Immediate: the value 10
 2342 operand immL10()
 2343 %{
 2344   predicate(n->get_long() == 10);
 2345   match(ConL);
 2346 
 2347   format %{ %}
 2348   interface(CONST_INTER);
 2349 %}
 2350 
 2351 // Long immediate from 0 to 127.
 2352 // Used for a shorter form of long mul by 10.
 2353 operand immL_127()
 2354 %{
 2355   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 2356   match(ConL);
 2357 
 2358   op_cost(10);
 2359   format %{ %}
 2360   interface(CONST_INTER);
 2361 %}
 2362 
 2363 // Long Immediate: low 32-bit mask
 2364 operand immL_32bits()
 2365 %{
 2366   predicate(n->get_long() == 0xFFFFFFFFL);
 2367   match(ConL);
 2368   op_cost(20);
 2369 
 2370   format %{ %}
 2371   interface(CONST_INTER);
 2372 %}
 2373 
 2374 // Int Immediate: 2^n-1, positive
 2375 operand immI_Pow2M1()
 2376 %{
 2377   predicate((n->get_int() > 0)
 2378             && is_power_of_2((juint)n->get_int() + 1));
 2379   match(ConI);
 2380 
 2381   op_cost(20);
 2382   format %{ %}
 2383   interface(CONST_INTER);
 2384 %}
 2385 
 2386 // Float Immediate zero
 2387 operand immF0()
 2388 %{
 2389   predicate(jint_cast(n->getf()) == 0);
 2390   match(ConF);
 2391 
 2392   op_cost(5);
 2393   format %{ %}
 2394   interface(CONST_INTER);
 2395 %}
 2396 
 2397 // Float Immediate
 2398 operand immF()
 2399 %{
 2400   match(ConF);
 2401 
 2402   op_cost(15);
 2403   format %{ %}
 2404   interface(CONST_INTER);
 2405 %}
 2406 
 2407 // Double Immediate zero
 2408 operand immD0()
 2409 %{
 2410   predicate(jlong_cast(n->getd()) == 0);
 2411   match(ConD);
 2412 
 2413   op_cost(5);
 2414   format %{ %}
 2415   interface(CONST_INTER);
 2416 %}
 2417 
 2418 // Double Immediate
 2419 operand immD()
 2420 %{
 2421   match(ConD);
 2422 
 2423   op_cost(15);
 2424   format %{ %}
 2425   interface(CONST_INTER);
 2426 %}
 2427 
 2428 // Immediates for special shifts (sign extend)
 2429 
 2430 // Constants for increment
 2431 operand immI_16()
 2432 %{
 2433   predicate(n->get_int() == 16);
 2434   match(ConI);
 2435 
 2436   format %{ %}
 2437   interface(CONST_INTER);
 2438 %}
 2439 
 2440 operand immI_24()
 2441 %{
 2442   predicate(n->get_int() == 24);
 2443   match(ConI);
 2444 
 2445   format %{ %}
 2446   interface(CONST_INTER);
 2447 %}
 2448 
 2449 // Constant for byte-wide masking
 2450 operand immI_255()
 2451 %{
 2452   predicate(n->get_int() == 255);
 2453   match(ConI);
 2454 
 2455   format %{ %}
 2456   interface(CONST_INTER);
 2457 %}
 2458 
 2459 // Constant for short-wide masking
 2460 operand immI_65535()
 2461 %{
 2462   predicate(n->get_int() == 65535);
 2463   match(ConI);
 2464 
 2465   format %{ %}
 2466   interface(CONST_INTER);
 2467 %}
 2468 
 2469 // Constant for byte-wide masking
 2470 operand immL_255()
 2471 %{
 2472   predicate(n->get_long() == 255);
 2473   match(ConL);
 2474 
 2475   format %{ %}
 2476   interface(CONST_INTER);
 2477 %}
 2478 
 2479 // Constant for short-wide masking
 2480 operand immL_65535()
 2481 %{
 2482   predicate(n->get_long() == 65535);
 2483   match(ConL);
 2484 
 2485   format %{ %}
 2486   interface(CONST_INTER);
 2487 %}
 2488 
 2489 operand kReg()
 2490 %{
 2491   constraint(ALLOC_IN_RC(vectmask_reg));
 2492   match(RegVectMask);
 2493   format %{%}
 2494   interface(REG_INTER);
 2495 %}
 2496 
 2497 operand kReg_K1()
 2498 %{
 2499   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 2500   match(RegVectMask);
 2501   format %{%}
 2502   interface(REG_INTER);
 2503 %}
 2504 
 2505 operand kReg_K2()
 2506 %{
 2507   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 2508   match(RegVectMask);
 2509   format %{%}
 2510   interface(REG_INTER);
 2511 %}
 2512 
 2513 // Special Registers
 2514 operand kReg_K3()
 2515 %{
 2516   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 2517   match(RegVectMask);
 2518   format %{%}
 2519   interface(REG_INTER);
 2520 %}
 2521 
 2522 operand kReg_K4()
 2523 %{
 2524   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 2525   match(RegVectMask);
 2526   format %{%}
 2527   interface(REG_INTER);
 2528 %}
 2529 
 2530 operand kReg_K5()
 2531 %{
 2532   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 2533   match(RegVectMask);
 2534   format %{%}
 2535   interface(REG_INTER);
 2536 %}
 2537 
 2538 operand kReg_K6()
 2539 %{
 2540   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 2541   match(RegVectMask);
 2542   format %{%}
 2543   interface(REG_INTER);
 2544 %}
 2545 
 2546 // Special Registers
 2547 operand kReg_K7()
 2548 %{
 2549   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 2550   match(RegVectMask);
 2551   format %{%}
 2552   interface(REG_INTER);
 2553 %}
 2554 
 2555 // Register Operands
 2556 // Integer Register
 2557 operand rRegI()
 2558 %{
 2559   constraint(ALLOC_IN_RC(int_reg));
 2560   match(RegI);
 2561 
 2562   match(rax_RegI);
 2563   match(rbx_RegI);
 2564   match(rcx_RegI);
 2565   match(rdx_RegI);
 2566   match(rdi_RegI);
 2567 
 2568   format %{ %}
 2569   interface(REG_INTER);
 2570 %}
 2571 
 2572 // Special Registers
 2573 operand rax_RegI()
 2574 %{
 2575   constraint(ALLOC_IN_RC(int_rax_reg));
 2576   match(RegI);
 2577   match(rRegI);
 2578 
 2579   format %{ "RAX" %}
 2580   interface(REG_INTER);
 2581 %}
 2582 
 2583 // Special Registers
 2584 operand rbx_RegI()
 2585 %{
 2586   constraint(ALLOC_IN_RC(int_rbx_reg));
 2587   match(RegI);
 2588   match(rRegI);
 2589 
 2590   format %{ "RBX" %}
 2591   interface(REG_INTER);
 2592 %}
 2593 
 2594 operand rcx_RegI()
 2595 %{
 2596   constraint(ALLOC_IN_RC(int_rcx_reg));
 2597   match(RegI);
 2598   match(rRegI);
 2599 
 2600   format %{ "RCX" %}
 2601   interface(REG_INTER);
 2602 %}
 2603 
 2604 operand rdx_RegI()
 2605 %{
 2606   constraint(ALLOC_IN_RC(int_rdx_reg));
 2607   match(RegI);
 2608   match(rRegI);
 2609 
 2610   format %{ "RDX" %}
 2611   interface(REG_INTER);
 2612 %}
 2613 
 2614 operand rdi_RegI()
 2615 %{
 2616   constraint(ALLOC_IN_RC(int_rdi_reg));
 2617   match(RegI);
 2618   match(rRegI);
 2619 
 2620   format %{ "RDI" %}
 2621   interface(REG_INTER);
 2622 %}
 2623 
 2624 operand no_rax_rdx_RegI()
 2625 %{
 2626   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 2627   match(RegI);
 2628   match(rbx_RegI);
 2629   match(rcx_RegI);
 2630   match(rdi_RegI);
 2631 
 2632   format %{ %}
 2633   interface(REG_INTER);
 2634 %}
 2635 
 2636 operand no_rbp_r13_RegI()
 2637 %{
 2638   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 2639   match(RegI);
 2640   match(rRegI);
 2641   match(rax_RegI);
 2642   match(rbx_RegI);
 2643   match(rcx_RegI);
 2644   match(rdx_RegI);
 2645   match(rdi_RegI);
 2646 
 2647   format %{ %}
 2648   interface(REG_INTER);
 2649 %}
 2650 
 2651 // Pointer Register
 2652 operand any_RegP()
 2653 %{
 2654   constraint(ALLOC_IN_RC(any_reg));
 2655   match(RegP);
 2656   match(rax_RegP);
 2657   match(rbx_RegP);
 2658   match(rdi_RegP);
 2659   match(rsi_RegP);
 2660   match(rbp_RegP);
 2661   match(r15_RegP);
 2662   match(rRegP);
 2663 
 2664   format %{ %}
 2665   interface(REG_INTER);
 2666 %}
 2667 
 2668 operand rRegP()
 2669 %{
 2670   constraint(ALLOC_IN_RC(ptr_reg));
 2671   match(RegP);
 2672   match(rax_RegP);
 2673   match(rbx_RegP);
 2674   match(rdi_RegP);
 2675   match(rsi_RegP);
 2676   match(rbp_RegP);  // See Q&A below about
 2677   match(r15_RegP);  // r15_RegP and rbp_RegP.
 2678 
 2679   format %{ %}
 2680   interface(REG_INTER);
 2681 %}
 2682 
 2683 operand rRegN() %{
 2684   constraint(ALLOC_IN_RC(int_reg));
 2685   match(RegN);
 2686 
 2687   format %{ %}
 2688   interface(REG_INTER);
 2689 %}
 2690 
 2691 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 2692 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 2693 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 2694 // The output of an instruction is controlled by the allocator, which respects
 2695 // register class masks, not match rules.  Unless an instruction mentions
 2696 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 2697 // by the allocator as an input.
 2698 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 2699 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 2700 // result, RBP is not included in the output of the instruction either.
 2701 
 2702 operand no_rax_RegP()
 2703 %{
 2704   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 2705   match(RegP);
 2706   match(rbx_RegP);
 2707   match(rsi_RegP);
 2708   match(rdi_RegP);
 2709 
 2710   format %{ %}
 2711   interface(REG_INTER);
 2712 %}
 2713 
 2714 // This operand is not allowed to use RBP even if
 2715 // RBP is not used to hold the frame pointer.
 2716 operand no_rbp_RegP()
 2717 %{
 2718   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 2719   match(RegP);
 2720   match(rbx_RegP);
 2721   match(rsi_RegP);
 2722   match(rdi_RegP);
 2723 
 2724   format %{ %}
 2725   interface(REG_INTER);
 2726 %}
 2727 
 2728 operand no_rax_rbx_RegP()
 2729 %{
 2730   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 2731   match(RegP);
 2732   match(rsi_RegP);
 2733   match(rdi_RegP);
 2734 
 2735   format %{ %}
 2736   interface(REG_INTER);
 2737 %}
 2738 
 2739 // Special Registers
 2740 // Return a pointer value
 2741 operand rax_RegP()
 2742 %{
 2743   constraint(ALLOC_IN_RC(ptr_rax_reg));
 2744   match(RegP);
 2745   match(rRegP);
 2746 
 2747   format %{ %}
 2748   interface(REG_INTER);
 2749 %}
 2750 
 2751 // Special Registers
 2752 // Return a compressed pointer value
 2753 operand rax_RegN()
 2754 %{
 2755   constraint(ALLOC_IN_RC(int_rax_reg));
 2756   match(RegN);
 2757   match(rRegN);
 2758 
 2759   format %{ %}
 2760   interface(REG_INTER);
 2761 %}
 2762 
 2763 // Used in AtomicAdd
 2764 operand rbx_RegP()
 2765 %{
 2766   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 2767   match(RegP);
 2768   match(rRegP);
 2769 
 2770   format %{ %}
 2771   interface(REG_INTER);
 2772 %}
 2773 
 2774 operand rsi_RegP()
 2775 %{
 2776   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 2777   match(RegP);
 2778   match(rRegP);
 2779 
 2780   format %{ %}
 2781   interface(REG_INTER);
 2782 %}
 2783 
 2784 operand rbp_RegP()
 2785 %{
 2786   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 2787   match(RegP);
 2788   match(rRegP);
 2789 
 2790   format %{ %}
 2791   interface(REG_INTER);
 2792 %}
 2793 
 2794 // Used in rep stosq
 2795 operand rdi_RegP()
 2796 %{
 2797   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 2798   match(RegP);
 2799   match(rRegP);
 2800 
 2801   format %{ %}
 2802   interface(REG_INTER);
 2803 %}
 2804 
 2805 operand r15_RegP()
 2806 %{
 2807   constraint(ALLOC_IN_RC(ptr_r15_reg));
 2808   match(RegP);
 2809   match(rRegP);
 2810 
 2811   format %{ %}
 2812   interface(REG_INTER);
 2813 %}
 2814 
 2815 operand rRegL()
 2816 %{
 2817   constraint(ALLOC_IN_RC(long_reg));
 2818   match(RegL);
 2819   match(rax_RegL);
 2820   match(rdx_RegL);
 2821 
 2822   format %{ %}
 2823   interface(REG_INTER);
 2824 %}
 2825 
 2826 // Special Registers
 2827 operand no_rax_rdx_RegL()
 2828 %{
 2829   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 2830   match(RegL);
 2831   match(rRegL);
 2832 
 2833   format %{ %}
 2834   interface(REG_INTER);
 2835 %}
 2836 
 2837 operand rax_RegL()
 2838 %{
 2839   constraint(ALLOC_IN_RC(long_rax_reg));
 2840   match(RegL);
 2841   match(rRegL);
 2842 
 2843   format %{ "RAX" %}
 2844   interface(REG_INTER);
 2845 %}
 2846 
 2847 operand rcx_RegL()
 2848 %{
 2849   constraint(ALLOC_IN_RC(long_rcx_reg));
 2850   match(RegL);
 2851   match(rRegL);
 2852 
 2853   format %{ %}
 2854   interface(REG_INTER);
 2855 %}
 2856 
 2857 operand rdx_RegL()
 2858 %{
 2859   constraint(ALLOC_IN_RC(long_rdx_reg));
 2860   match(RegL);
 2861   match(rRegL);
 2862 
 2863   format %{ %}
 2864   interface(REG_INTER);
 2865 %}
 2866 
 2867 operand no_rbp_r13_RegL()
 2868 %{
 2869   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 2870   match(RegL);
 2871   match(rRegL);
 2872   match(rax_RegL);
 2873   match(rcx_RegL);
 2874   match(rdx_RegL);
 2875 
 2876   format %{ %}
 2877   interface(REG_INTER);
 2878 %}
 2879 
 2880 // Flags register, used as output of compare instructions
 2881 operand rFlagsReg()
 2882 %{
 2883   constraint(ALLOC_IN_RC(int_flags));
 2884   match(RegFlags);
 2885 
 2886   format %{ "RFLAGS" %}
 2887   interface(REG_INTER);
 2888 %}
 2889 
 2890 // Flags register, used as output of FLOATING POINT compare instructions
 2891 operand rFlagsRegU()
 2892 %{
 2893   constraint(ALLOC_IN_RC(int_flags));
 2894   match(RegFlags);
 2895 
 2896   format %{ "RFLAGS_U" %}
 2897   interface(REG_INTER);
 2898 %}
 2899 
 2900 operand rFlagsRegUCF() %{
 2901   constraint(ALLOC_IN_RC(int_flags));
 2902   match(RegFlags);
 2903   predicate(false);
 2904 
 2905   format %{ "RFLAGS_U_CF" %}
 2906   interface(REG_INTER);
 2907 %}
 2908 
 2909 // Float register operands
 2910 operand regF() %{
 2911    constraint(ALLOC_IN_RC(float_reg));
 2912    match(RegF);
 2913 
 2914    format %{ %}
 2915    interface(REG_INTER);
 2916 %}
 2917 
 2918 // Float register operands
 2919 operand legRegF() %{
 2920    constraint(ALLOC_IN_RC(float_reg_legacy));
 2921    match(RegF);
 2922 
 2923    format %{ %}
 2924    interface(REG_INTER);
 2925 %}
 2926 
 2927 // Float register operands
 2928 operand vlRegF() %{
 2929    constraint(ALLOC_IN_RC(float_reg_vl));
 2930    match(RegF);
 2931 
 2932    format %{ %}
 2933    interface(REG_INTER);
 2934 %}
 2935 
 2936 // Double register operands
 2937 operand regD() %{
 2938    constraint(ALLOC_IN_RC(double_reg));
 2939    match(RegD);
 2940 
 2941    format %{ %}
 2942    interface(REG_INTER);
 2943 %}
 2944 
 2945 // Double register operands
 2946 operand legRegD() %{
 2947    constraint(ALLOC_IN_RC(double_reg_legacy));
 2948    match(RegD);
 2949 
 2950    format %{ %}
 2951    interface(REG_INTER);
 2952 %}
 2953 
 2954 // Double register operands
 2955 operand vlRegD() %{
 2956    constraint(ALLOC_IN_RC(double_reg_vl));
 2957    match(RegD);
 2958 
 2959    format %{ %}
 2960    interface(REG_INTER);
 2961 %}
 2962 
 2963 //----------Memory Operands----------------------------------------------------
 2964 // Direct Memory Operand
 2965 // operand direct(immP addr)
 2966 // %{
 2967 //   match(addr);
 2968 
 2969 //   format %{ "[$addr]" %}
 2970 //   interface(MEMORY_INTER) %{
 2971 //     base(0xFFFFFFFF);
 2972 //     index(0x4);
 2973 //     scale(0x0);
 2974 //     disp($addr);
 2975 //   %}
 2976 // %}
 2977 
 2978 // Indirect Memory Operand
 2979 operand indirect(any_RegP reg)
 2980 %{
 2981   constraint(ALLOC_IN_RC(ptr_reg));
 2982   match(reg);
 2983 
 2984   format %{ "[$reg]" %}
 2985   interface(MEMORY_INTER) %{
 2986     base($reg);
 2987     index(0x4);
 2988     scale(0x0);
 2989     disp(0x0);
 2990   %}
 2991 %}
 2992 
 2993 // Indirect Memory Plus Short Offset Operand
 2994 operand indOffset8(any_RegP reg, immL8 off)
 2995 %{
 2996   constraint(ALLOC_IN_RC(ptr_reg));
 2997   match(AddP reg off);
 2998 
 2999   format %{ "[$reg + $off (8-bit)]" %}
 3000   interface(MEMORY_INTER) %{
 3001     base($reg);
 3002     index(0x4);
 3003     scale(0x0);
 3004     disp($off);
 3005   %}
 3006 %}
 3007 
 3008 // Indirect Memory Plus Long Offset Operand
 3009 operand indOffset32(any_RegP reg, immL32 off)
 3010 %{
 3011   constraint(ALLOC_IN_RC(ptr_reg));
 3012   match(AddP reg off);
 3013 
 3014   format %{ "[$reg + $off (32-bit)]" %}
 3015   interface(MEMORY_INTER) %{
 3016     base($reg);
 3017     index(0x4);
 3018     scale(0x0);
 3019     disp($off);
 3020   %}
 3021 %}
 3022 
 3023 // Indirect Memory Plus Index Register Plus Offset Operand
 3024 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3025 %{
 3026   constraint(ALLOC_IN_RC(ptr_reg));
 3027   match(AddP (AddP reg lreg) off);
 3028 
 3029   op_cost(10);
 3030   format %{"[$reg + $off + $lreg]" %}
 3031   interface(MEMORY_INTER) %{
 3032     base($reg);
 3033     index($lreg);
 3034     scale(0x0);
 3035     disp($off);
 3036   %}
 3037 %}
 3038 
 3039 // Indirect Memory Plus Index Register Plus Offset Operand
 3040 operand indIndex(any_RegP reg, rRegL lreg)
 3041 %{
 3042   constraint(ALLOC_IN_RC(ptr_reg));
 3043   match(AddP reg lreg);
 3044 
 3045   op_cost(10);
 3046   format %{"[$reg + $lreg]" %}
 3047   interface(MEMORY_INTER) %{
 3048     base($reg);
 3049     index($lreg);
 3050     scale(0x0);
 3051     disp(0x0);
 3052   %}
 3053 %}
 3054 
 3055 // Indirect Memory Times Scale Plus Index Register
 3056 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3057 %{
 3058   constraint(ALLOC_IN_RC(ptr_reg));
 3059   match(AddP reg (LShiftL lreg scale));
 3060 
 3061   op_cost(10);
 3062   format %{"[$reg + $lreg << $scale]" %}
 3063   interface(MEMORY_INTER) %{
 3064     base($reg);
 3065     index($lreg);
 3066     scale($scale);
 3067     disp(0x0);
 3068   %}
 3069 %}
 3070 
 3071 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3072 %{
 3073   constraint(ALLOC_IN_RC(ptr_reg));
 3074   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3075   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3076 
 3077   op_cost(10);
 3078   format %{"[$reg + pos $idx << $scale]" %}
 3079   interface(MEMORY_INTER) %{
 3080     base($reg);
 3081     index($idx);
 3082     scale($scale);
 3083     disp(0x0);
 3084   %}
 3085 %}
 3086 
 3087 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3088 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3089 %{
 3090   constraint(ALLOC_IN_RC(ptr_reg));
 3091   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3092 
 3093   op_cost(10);
 3094   format %{"[$reg + $off + $lreg << $scale]" %}
 3095   interface(MEMORY_INTER) %{
 3096     base($reg);
 3097     index($lreg);
 3098     scale($scale);
 3099     disp($off);
 3100   %}
 3101 %}
 3102 
 3103 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3104 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3105 %{
 3106   constraint(ALLOC_IN_RC(ptr_reg));
 3107   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3108   match(AddP (AddP reg (ConvI2L idx)) off);
 3109 
 3110   op_cost(10);
 3111   format %{"[$reg + $off + $idx]" %}
 3112   interface(MEMORY_INTER) %{
 3113     base($reg);
 3114     index($idx);
 3115     scale(0x0);
 3116     disp($off);
 3117   %}
 3118 %}
 3119 
 3120 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3121 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3122 %{
 3123   constraint(ALLOC_IN_RC(ptr_reg));
 3124   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3125   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3126 
 3127   op_cost(10);
 3128   format %{"[$reg + $off + $idx << $scale]" %}
 3129   interface(MEMORY_INTER) %{
 3130     base($reg);
 3131     index($idx);
 3132     scale($scale);
 3133     disp($off);
 3134   %}
 3135 %}
 3136 
 3137 // Indirect Narrow Oop Operand
 3138 operand indCompressedOop(rRegN reg) %{
 3139   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3140   constraint(ALLOC_IN_RC(ptr_reg));
 3141   match(DecodeN reg);
 3142 
 3143   op_cost(10);
 3144   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3145   interface(MEMORY_INTER) %{
 3146     base(0xc); // R12
 3147     index($reg);
 3148     scale(0x3);
 3149     disp(0x0);
 3150   %}
 3151 %}
 3152 
 3153 // Indirect Narrow Oop Plus Offset Operand
 3154 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3155 // we can't free r12 even with CompressedOops::base() == nullptr.
 3156 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3157   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3158   constraint(ALLOC_IN_RC(ptr_reg));
 3159   match(AddP (DecodeN reg) off);
 3160 
 3161   op_cost(10);
 3162   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3163   interface(MEMORY_INTER) %{
 3164     base(0xc); // R12
 3165     index($reg);
 3166     scale(0x3);
 3167     disp($off);
 3168   %}
 3169 %}
 3170 
 3171 // Indirect Memory Operand
 3172 operand indirectNarrow(rRegN reg)
 3173 %{
 3174   predicate(CompressedOops::shift() == 0);
 3175   constraint(ALLOC_IN_RC(ptr_reg));
 3176   match(DecodeN reg);
 3177 
 3178   format %{ "[$reg]" %}
 3179   interface(MEMORY_INTER) %{
 3180     base($reg);
 3181     index(0x4);
 3182     scale(0x0);
 3183     disp(0x0);
 3184   %}
 3185 %}
 3186 
 3187 // Indirect Memory Plus Short Offset Operand
 3188 operand indOffset8Narrow(rRegN reg, immL8 off)
 3189 %{
 3190   predicate(CompressedOops::shift() == 0);
 3191   constraint(ALLOC_IN_RC(ptr_reg));
 3192   match(AddP (DecodeN reg) off);
 3193 
 3194   format %{ "[$reg + $off (8-bit)]" %}
 3195   interface(MEMORY_INTER) %{
 3196     base($reg);
 3197     index(0x4);
 3198     scale(0x0);
 3199     disp($off);
 3200   %}
 3201 %}
 3202 
 3203 // Indirect Memory Plus Long Offset Operand
 3204 operand indOffset32Narrow(rRegN reg, immL32 off)
 3205 %{
 3206   predicate(CompressedOops::shift() == 0);
 3207   constraint(ALLOC_IN_RC(ptr_reg));
 3208   match(AddP (DecodeN reg) off);
 3209 
 3210   format %{ "[$reg + $off (32-bit)]" %}
 3211   interface(MEMORY_INTER) %{
 3212     base($reg);
 3213     index(0x4);
 3214     scale(0x0);
 3215     disp($off);
 3216   %}
 3217 %}
 3218 
 3219 // Indirect Memory Plus Index Register Plus Offset Operand
 3220 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 3221 %{
 3222   predicate(CompressedOops::shift() == 0);
 3223   constraint(ALLOC_IN_RC(ptr_reg));
 3224   match(AddP (AddP (DecodeN reg) lreg) off);
 3225 
 3226   op_cost(10);
 3227   format %{"[$reg + $off + $lreg]" %}
 3228   interface(MEMORY_INTER) %{
 3229     base($reg);
 3230     index($lreg);
 3231     scale(0x0);
 3232     disp($off);
 3233   %}
 3234 %}
 3235 
 3236 // Indirect Memory Plus Index Register Plus Offset Operand
 3237 operand indIndexNarrow(rRegN reg, rRegL lreg)
 3238 %{
 3239   predicate(CompressedOops::shift() == 0);
 3240   constraint(ALLOC_IN_RC(ptr_reg));
 3241   match(AddP (DecodeN reg) lreg);
 3242 
 3243   op_cost(10);
 3244   format %{"[$reg + $lreg]" %}
 3245   interface(MEMORY_INTER) %{
 3246     base($reg);
 3247     index($lreg);
 3248     scale(0x0);
 3249     disp(0x0);
 3250   %}
 3251 %}
 3252 
 3253 // Indirect Memory Times Scale Plus Index Register
 3254 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 3255 %{
 3256   predicate(CompressedOops::shift() == 0);
 3257   constraint(ALLOC_IN_RC(ptr_reg));
 3258   match(AddP (DecodeN reg) (LShiftL lreg scale));
 3259 
 3260   op_cost(10);
 3261   format %{"[$reg + $lreg << $scale]" %}
 3262   interface(MEMORY_INTER) %{
 3263     base($reg);
 3264     index($lreg);
 3265     scale($scale);
 3266     disp(0x0);
 3267   %}
 3268 %}
 3269 
 3270 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3271 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 3272 %{
 3273   predicate(CompressedOops::shift() == 0);
 3274   constraint(ALLOC_IN_RC(ptr_reg));
 3275   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 3276 
 3277   op_cost(10);
 3278   format %{"[$reg + $off + $lreg << $scale]" %}
 3279   interface(MEMORY_INTER) %{
 3280     base($reg);
 3281     index($lreg);
 3282     scale($scale);
 3283     disp($off);
 3284   %}
 3285 %}
 3286 
 3287 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 3288 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 3289 %{
 3290   constraint(ALLOC_IN_RC(ptr_reg));
 3291   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3292   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 3293 
 3294   op_cost(10);
 3295   format %{"[$reg + $off + $idx]" %}
 3296   interface(MEMORY_INTER) %{
 3297     base($reg);
 3298     index($idx);
 3299     scale(0x0);
 3300     disp($off);
 3301   %}
 3302 %}
 3303 
 3304 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3305 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 3306 %{
 3307   constraint(ALLOC_IN_RC(ptr_reg));
 3308   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3309   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 3310 
 3311   op_cost(10);
 3312   format %{"[$reg + $off + $idx << $scale]" %}
 3313   interface(MEMORY_INTER) %{
 3314     base($reg);
 3315     index($idx);
 3316     scale($scale);
 3317     disp($off);
 3318   %}
 3319 %}
 3320 
 3321 //----------Special Memory Operands--------------------------------------------
 3322 // Stack Slot Operand - This operand is used for loading and storing temporary
 3323 //                      values on the stack where a match requires a value to
 3324 //                      flow through memory.
 3325 operand stackSlotP(sRegP reg)
 3326 %{
 3327   constraint(ALLOC_IN_RC(stack_slots));
 3328   // No match rule because this operand is only generated in matching
 3329 
 3330   format %{ "[$reg]" %}
 3331   interface(MEMORY_INTER) %{
 3332     base(0x4);   // RSP
 3333     index(0x4);  // No Index
 3334     scale(0x0);  // No Scale
 3335     disp($reg);  // Stack Offset
 3336   %}
 3337 %}
 3338 
 3339 operand stackSlotI(sRegI reg)
 3340 %{
 3341   constraint(ALLOC_IN_RC(stack_slots));
 3342   // No match rule because this operand is only generated in matching
 3343 
 3344   format %{ "[$reg]" %}
 3345   interface(MEMORY_INTER) %{
 3346     base(0x4);   // RSP
 3347     index(0x4);  // No Index
 3348     scale(0x0);  // No Scale
 3349     disp($reg);  // Stack Offset
 3350   %}
 3351 %}
 3352 
 3353 operand stackSlotF(sRegF reg)
 3354 %{
 3355   constraint(ALLOC_IN_RC(stack_slots));
 3356   // No match rule because this operand is only generated in matching
 3357 
 3358   format %{ "[$reg]" %}
 3359   interface(MEMORY_INTER) %{
 3360     base(0x4);   // RSP
 3361     index(0x4);  // No Index
 3362     scale(0x0);  // No Scale
 3363     disp($reg);  // Stack Offset
 3364   %}
 3365 %}
 3366 
 3367 operand stackSlotD(sRegD reg)
 3368 %{
 3369   constraint(ALLOC_IN_RC(stack_slots));
 3370   // No match rule because this operand is only generated in matching
 3371 
 3372   format %{ "[$reg]" %}
 3373   interface(MEMORY_INTER) %{
 3374     base(0x4);   // RSP
 3375     index(0x4);  // No Index
 3376     scale(0x0);  // No Scale
 3377     disp($reg);  // Stack Offset
 3378   %}
 3379 %}
 3380 operand stackSlotL(sRegL reg)
 3381 %{
 3382   constraint(ALLOC_IN_RC(stack_slots));
 3383   // No match rule because this operand is only generated in matching
 3384 
 3385   format %{ "[$reg]" %}
 3386   interface(MEMORY_INTER) %{
 3387     base(0x4);   // RSP
 3388     index(0x4);  // No Index
 3389     scale(0x0);  // No Scale
 3390     disp($reg);  // Stack Offset
 3391   %}
 3392 %}
 3393 
 3394 //----------Conditional Branch Operands----------------------------------------
 3395 // Comparison Op  - This is the operation of the comparison, and is limited to
 3396 //                  the following set of codes:
 3397 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 3398 //
 3399 // Other attributes of the comparison, such as unsignedness, are specified
 3400 // by the comparison instruction that sets a condition code flags register.
 3401 // That result is represented by a flags operand whose subtype is appropriate
 3402 // to the unsignedness (etc.) of the comparison.
 3403 //
 3404 // Later, the instruction which matches both the Comparison Op (a Bool) and
 3405 // the flags (produced by the Cmp) specifies the coding of the comparison op
 3406 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 3407 
 3408 // Comparison Code
 3409 operand cmpOp()
 3410 %{
 3411   match(Bool);
 3412 
 3413   format %{ "" %}
 3414   interface(COND_INTER) %{
 3415     equal(0x4, "e");
 3416     not_equal(0x5, "ne");
 3417     less(0xC, "l");
 3418     greater_equal(0xD, "ge");
 3419     less_equal(0xE, "le");
 3420     greater(0xF, "g");
 3421     overflow(0x0, "o");
 3422     no_overflow(0x1, "no");
 3423   %}
 3424 %}
 3425 
 3426 // Comparison Code, unsigned compare.  Used by FP also, with
 3427 // C2 (unordered) turned into GT or LT already.  The other bits
 3428 // C0 and C3 are turned into Carry & Zero flags.
 3429 operand cmpOpU()
 3430 %{
 3431   match(Bool);
 3432 
 3433   format %{ "" %}
 3434   interface(COND_INTER) %{
 3435     equal(0x4, "e");
 3436     not_equal(0x5, "ne");
 3437     less(0x2, "b");
 3438     greater_equal(0x3, "ae");
 3439     less_equal(0x6, "be");
 3440     greater(0x7, "a");
 3441     overflow(0x0, "o");
 3442     no_overflow(0x1, "no");
 3443   %}
 3444 %}
 3445 
 3446 
 3447 // Floating comparisons that don't require any fixup for the unordered case,
 3448 // If both inputs of the comparison are the same, ZF is always set so we
 3449 // don't need to use cmpOpUCF2 for eq/ne
 3450 operand cmpOpUCF() %{
 3451   match(Bool);
 3452   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 3453             n->as_Bool()->_test._test == BoolTest::ge ||
 3454             n->as_Bool()->_test._test == BoolTest::le ||
 3455             n->as_Bool()->_test._test == BoolTest::gt ||
 3456             n->in(1)->in(1) == n->in(1)->in(2));
 3457   format %{ "" %}
 3458   interface(COND_INTER) %{
 3459     equal(0xb, "np");
 3460     not_equal(0xa, "p");
 3461     less(0x2, "b");
 3462     greater_equal(0x3, "ae");
 3463     less_equal(0x6, "be");
 3464     greater(0x7, "a");
 3465     overflow(0x0, "o");
 3466     no_overflow(0x1, "no");
 3467   %}
 3468 %}
 3469 
 3470 
 3471 // Floating comparisons that can be fixed up with extra conditional jumps
 3472 operand cmpOpUCF2() %{
 3473   match(Bool);
 3474   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 3475              n->as_Bool()->_test._test == BoolTest::eq) &&
 3476             n->in(1)->in(1) != n->in(1)->in(2));
 3477   format %{ "" %}
 3478   interface(COND_INTER) %{
 3479     equal(0x4, "e");
 3480     not_equal(0x5, "ne");
 3481     less(0x2, "b");
 3482     greater_equal(0x3, "ae");
 3483     less_equal(0x6, "be");
 3484     greater(0x7, "a");
 3485     overflow(0x0, "o");
 3486     no_overflow(0x1, "no");
 3487   %}
 3488 %}
 3489 
 3490 //----------OPERAND CLASSES----------------------------------------------------
 3491 // Operand Classes are groups of operands that are used as to simplify
 3492 // instruction definitions by not requiring the AD writer to specify separate
 3493 // instructions for every form of operand when the instruction accepts
 3494 // multiple operand types with the same basic encoding and format.  The classic
 3495 // case of this is memory operands.
 3496 
 3497 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 3498                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 3499                indCompressedOop, indCompressedOopOffset,
 3500                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 3501                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 3502                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 3503 
 3504 //----------PIPELINE-----------------------------------------------------------
 3505 // Rules which define the behavior of the target architectures pipeline.
 3506 pipeline %{
 3507 
 3508 //----------ATTRIBUTES---------------------------------------------------------
 3509 attributes %{
 3510   variable_size_instructions;        // Fixed size instructions
 3511   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 3512   instruction_unit_size = 1;         // An instruction is 1 bytes long
 3513   instruction_fetch_unit_size = 16;  // The processor fetches one line
 3514   instruction_fetch_units = 1;       // of 16 bytes
 3515 
 3516   // List of nop instructions
 3517   nops( MachNop );
 3518 %}
 3519 
 3520 //----------RESOURCES----------------------------------------------------------
 3521 // Resources are the functional units available to the machine
 3522 
 3523 // Generic P2/P3 pipeline
 3524 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 3525 // 3 instructions decoded per cycle.
 3526 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 3527 // 3 ALU op, only ALU0 handles mul instructions.
 3528 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 3529            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 3530            BR, FPU,
 3531            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 3532 
 3533 //----------PIPELINE DESCRIPTION-----------------------------------------------
 3534 // Pipeline Description specifies the stages in the machine's pipeline
 3535 
 3536 // Generic P2/P3 pipeline
 3537 pipe_desc(S0, S1, S2, S3, S4, S5);
 3538 
 3539 //----------PIPELINE CLASSES---------------------------------------------------
 3540 // Pipeline Classes describe the stages in which input and output are
 3541 // referenced by the hardware pipeline.
 3542 
 3543 // Naming convention: ialu or fpu
 3544 // Then: _reg
 3545 // Then: _reg if there is a 2nd register
 3546 // Then: _long if it's a pair of instructions implementing a long
 3547 // Then: _fat if it requires the big decoder
 3548 //   Or: _mem if it requires the big decoder and a memory unit.
 3549 
 3550 // Integer ALU reg operation
 3551 pipe_class ialu_reg(rRegI dst)
 3552 %{
 3553     single_instruction;
 3554     dst    : S4(write);
 3555     dst    : S3(read);
 3556     DECODE : S0;        // any decoder
 3557     ALU    : S3;        // any alu
 3558 %}
 3559 
 3560 // Long ALU reg operation
 3561 pipe_class ialu_reg_long(rRegL dst)
 3562 %{
 3563     instruction_count(2);
 3564     dst    : S4(write);
 3565     dst    : S3(read);
 3566     DECODE : S0(2);     // any 2 decoders
 3567     ALU    : S3(2);     // both alus
 3568 %}
 3569 
 3570 // Integer ALU reg operation using big decoder
 3571 pipe_class ialu_reg_fat(rRegI dst)
 3572 %{
 3573     single_instruction;
 3574     dst    : S4(write);
 3575     dst    : S3(read);
 3576     D0     : S0;        // big decoder only
 3577     ALU    : S3;        // any alu
 3578 %}
 3579 
 3580 // Integer ALU reg-reg operation
 3581 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 3582 %{
 3583     single_instruction;
 3584     dst    : S4(write);
 3585     src    : S3(read);
 3586     DECODE : S0;        // any decoder
 3587     ALU    : S3;        // any alu
 3588 %}
 3589 
 3590 // Integer ALU reg-reg operation
 3591 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 3592 %{
 3593     single_instruction;
 3594     dst    : S4(write);
 3595     src    : S3(read);
 3596     D0     : S0;        // big decoder only
 3597     ALU    : S3;        // any alu
 3598 %}
 3599 
 3600 // Integer ALU reg-mem operation
 3601 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 3602 %{
 3603     single_instruction;
 3604     dst    : S5(write);
 3605     mem    : S3(read);
 3606     D0     : S0;        // big decoder only
 3607     ALU    : S4;        // any alu
 3608     MEM    : S3;        // any mem
 3609 %}
 3610 
 3611 // Integer mem operation (prefetch)
 3612 pipe_class ialu_mem(memory mem)
 3613 %{
 3614     single_instruction;
 3615     mem    : S3(read);
 3616     D0     : S0;        // big decoder only
 3617     MEM    : S3;        // any mem
 3618 %}
 3619 
 3620 // Integer Store to Memory
 3621 pipe_class ialu_mem_reg(memory mem, rRegI src)
 3622 %{
 3623     single_instruction;
 3624     mem    : S3(read);
 3625     src    : S5(read);
 3626     D0     : S0;        // big decoder only
 3627     ALU    : S4;        // any alu
 3628     MEM    : S3;
 3629 %}
 3630 
 3631 // // Long Store to Memory
 3632 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 3633 // %{
 3634 //     instruction_count(2);
 3635 //     mem    : S3(read);
 3636 //     src    : S5(read);
 3637 //     D0     : S0(2);          // big decoder only; twice
 3638 //     ALU    : S4(2);     // any 2 alus
 3639 //     MEM    : S3(2);  // Both mems
 3640 // %}
 3641 
 3642 // Integer Store to Memory
 3643 pipe_class ialu_mem_imm(memory mem)
 3644 %{
 3645     single_instruction;
 3646     mem    : S3(read);
 3647     D0     : S0;        // big decoder only
 3648     ALU    : S4;        // any alu
 3649     MEM    : S3;
 3650 %}
 3651 
 3652 // Integer ALU0 reg-reg operation
 3653 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 3654 %{
 3655     single_instruction;
 3656     dst    : S4(write);
 3657     src    : S3(read);
 3658     D0     : S0;        // Big decoder only
 3659     ALU0   : S3;        // only alu0
 3660 %}
 3661 
 3662 // Integer ALU0 reg-mem operation
 3663 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 3664 %{
 3665     single_instruction;
 3666     dst    : S5(write);
 3667     mem    : S3(read);
 3668     D0     : S0;        // big decoder only
 3669     ALU0   : S4;        // ALU0 only
 3670     MEM    : S3;        // any mem
 3671 %}
 3672 
 3673 // Integer ALU reg-reg operation
 3674 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 3675 %{
 3676     single_instruction;
 3677     cr     : S4(write);
 3678     src1   : S3(read);
 3679     src2   : S3(read);
 3680     DECODE : S0;        // any decoder
 3681     ALU    : S3;        // any alu
 3682 %}
 3683 
 3684 // Integer ALU reg-imm operation
 3685 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 3686 %{
 3687     single_instruction;
 3688     cr     : S4(write);
 3689     src1   : S3(read);
 3690     DECODE : S0;        // any decoder
 3691     ALU    : S3;        // any alu
 3692 %}
 3693 
 3694 // Integer ALU reg-mem operation
 3695 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 3696 %{
 3697     single_instruction;
 3698     cr     : S4(write);
 3699     src1   : S3(read);
 3700     src2   : S3(read);
 3701     D0     : S0;        // big decoder only
 3702     ALU    : S4;        // any alu
 3703     MEM    : S3;
 3704 %}
 3705 
 3706 // Conditional move reg-reg
 3707 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 3708 %{
 3709     instruction_count(4);
 3710     y      : S4(read);
 3711     q      : S3(read);
 3712     p      : S3(read);
 3713     DECODE : S0(4);     // any decoder
 3714 %}
 3715 
 3716 // Conditional move reg-reg
 3717 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 3718 %{
 3719     single_instruction;
 3720     dst    : S4(write);
 3721     src    : S3(read);
 3722     cr     : S3(read);
 3723     DECODE : S0;        // any decoder
 3724 %}
 3725 
 3726 // Conditional move reg-mem
 3727 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 3728 %{
 3729     single_instruction;
 3730     dst    : S4(write);
 3731     src    : S3(read);
 3732     cr     : S3(read);
 3733     DECODE : S0;        // any decoder
 3734     MEM    : S3;
 3735 %}
 3736 
 3737 // Conditional move reg-reg long
 3738 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 3739 %{
 3740     single_instruction;
 3741     dst    : S4(write);
 3742     src    : S3(read);
 3743     cr     : S3(read);
 3744     DECODE : S0(2);     // any 2 decoders
 3745 %}
 3746 
 3747 // Float reg-reg operation
 3748 pipe_class fpu_reg(regD dst)
 3749 %{
 3750     instruction_count(2);
 3751     dst    : S3(read);
 3752     DECODE : S0(2);     // any 2 decoders
 3753     FPU    : S3;
 3754 %}
 3755 
 3756 // Float reg-reg operation
 3757 pipe_class fpu_reg_reg(regD dst, regD src)
 3758 %{
 3759     instruction_count(2);
 3760     dst    : S4(write);
 3761     src    : S3(read);
 3762     DECODE : S0(2);     // any 2 decoders
 3763     FPU    : S3;
 3764 %}
 3765 
 3766 // Float reg-reg operation
 3767 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 3768 %{
 3769     instruction_count(3);
 3770     dst    : S4(write);
 3771     src1   : S3(read);
 3772     src2   : S3(read);
 3773     DECODE : S0(3);     // any 3 decoders
 3774     FPU    : S3(2);
 3775 %}
 3776 
 3777 // Float reg-reg operation
 3778 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 3779 %{
 3780     instruction_count(4);
 3781     dst    : S4(write);
 3782     src1   : S3(read);
 3783     src2   : S3(read);
 3784     src3   : S3(read);
 3785     DECODE : S0(4);     // any 3 decoders
 3786     FPU    : S3(2);
 3787 %}
 3788 
 3789 // Float reg-reg operation
 3790 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 3791 %{
 3792     instruction_count(4);
 3793     dst    : S4(write);
 3794     src1   : S3(read);
 3795     src2   : S3(read);
 3796     src3   : S3(read);
 3797     DECODE : S1(3);     // any 3 decoders
 3798     D0     : S0;        // Big decoder only
 3799     FPU    : S3(2);
 3800     MEM    : S3;
 3801 %}
 3802 
 3803 // Float reg-mem operation
 3804 pipe_class fpu_reg_mem(regD dst, memory mem)
 3805 %{
 3806     instruction_count(2);
 3807     dst    : S5(write);
 3808     mem    : S3(read);
 3809     D0     : S0;        // big decoder only
 3810     DECODE : S1;        // any decoder for FPU POP
 3811     FPU    : S4;
 3812     MEM    : S3;        // any mem
 3813 %}
 3814 
 3815 // Float reg-mem operation
 3816 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 3817 %{
 3818     instruction_count(3);
 3819     dst    : S5(write);
 3820     src1   : S3(read);
 3821     mem    : S3(read);
 3822     D0     : S0;        // big decoder only
 3823     DECODE : S1(2);     // any decoder for FPU POP
 3824     FPU    : S4;
 3825     MEM    : S3;        // any mem
 3826 %}
 3827 
 3828 // Float mem-reg operation
 3829 pipe_class fpu_mem_reg(memory mem, regD src)
 3830 %{
 3831     instruction_count(2);
 3832     src    : S5(read);
 3833     mem    : S3(read);
 3834     DECODE : S0;        // any decoder for FPU PUSH
 3835     D0     : S1;        // big decoder only
 3836     FPU    : S4;
 3837     MEM    : S3;        // any mem
 3838 %}
 3839 
 3840 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 3841 %{
 3842     instruction_count(3);
 3843     src1   : S3(read);
 3844     src2   : S3(read);
 3845     mem    : S3(read);
 3846     DECODE : S0(2);     // any decoder for FPU PUSH
 3847     D0     : S1;        // big decoder only
 3848     FPU    : S4;
 3849     MEM    : S3;        // any mem
 3850 %}
 3851 
 3852 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 3853 %{
 3854     instruction_count(3);
 3855     src1   : S3(read);
 3856     src2   : S3(read);
 3857     mem    : S4(read);
 3858     DECODE : S0;        // any decoder for FPU PUSH
 3859     D0     : S0(2);     // big decoder only
 3860     FPU    : S4;
 3861     MEM    : S3(2);     // any mem
 3862 %}
 3863 
 3864 pipe_class fpu_mem_mem(memory dst, memory src1)
 3865 %{
 3866     instruction_count(2);
 3867     src1   : S3(read);
 3868     dst    : S4(read);
 3869     D0     : S0(2);     // big decoder only
 3870     MEM    : S3(2);     // any mem
 3871 %}
 3872 
 3873 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 3874 %{
 3875     instruction_count(3);
 3876     src1   : S3(read);
 3877     src2   : S3(read);
 3878     dst    : S4(read);
 3879     D0     : S0(3);     // big decoder only
 3880     FPU    : S4;
 3881     MEM    : S3(3);     // any mem
 3882 %}
 3883 
 3884 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 3885 %{
 3886     instruction_count(3);
 3887     src1   : S4(read);
 3888     mem    : S4(read);
 3889     DECODE : S0;        // any decoder for FPU PUSH
 3890     D0     : S0(2);     // big decoder only
 3891     FPU    : S4;
 3892     MEM    : S3(2);     // any mem
 3893 %}
 3894 
 3895 // Float load constant
 3896 pipe_class fpu_reg_con(regD dst)
 3897 %{
 3898     instruction_count(2);
 3899     dst    : S5(write);
 3900     D0     : S0;        // big decoder only for the load
 3901     DECODE : S1;        // any decoder for FPU POP
 3902     FPU    : S4;
 3903     MEM    : S3;        // any mem
 3904 %}
 3905 
 3906 // Float load constant
 3907 pipe_class fpu_reg_reg_con(regD dst, regD src)
 3908 %{
 3909     instruction_count(3);
 3910     dst    : S5(write);
 3911     src    : S3(read);
 3912     D0     : S0;        // big decoder only for the load
 3913     DECODE : S1(2);     // any decoder for FPU POP
 3914     FPU    : S4;
 3915     MEM    : S3;        // any mem
 3916 %}
 3917 
 3918 // UnConditional branch
 3919 pipe_class pipe_jmp(label labl)
 3920 %{
 3921     single_instruction;
 3922     BR   : S3;
 3923 %}
 3924 
 3925 // Conditional branch
 3926 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 3927 %{
 3928     single_instruction;
 3929     cr    : S1(read);
 3930     BR    : S3;
 3931 %}
 3932 
 3933 // Allocation idiom
 3934 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 3935 %{
 3936     instruction_count(1); force_serialization;
 3937     fixed_latency(6);
 3938     heap_ptr : S3(read);
 3939     DECODE   : S0(3);
 3940     D0       : S2;
 3941     MEM      : S3;
 3942     ALU      : S3(2);
 3943     dst      : S5(write);
 3944     BR       : S5;
 3945 %}
 3946 
 3947 // Generic big/slow expanded idiom
 3948 pipe_class pipe_slow()
 3949 %{
 3950     instruction_count(10); multiple_bundles; force_serialization;
 3951     fixed_latency(100);
 3952     D0  : S0(2);
 3953     MEM : S3(2);
 3954 %}
 3955 
 3956 // The real do-nothing guy
 3957 pipe_class empty()
 3958 %{
 3959     instruction_count(0);
 3960 %}
 3961 
 3962 // Define the class for the Nop node
 3963 define
 3964 %{
 3965    MachNop = empty;
 3966 %}
 3967 
 3968 %}
 3969 
 3970 //----------INSTRUCTIONS-------------------------------------------------------
 3971 //
 3972 // match      -- States which machine-independent subtree may be replaced
 3973 //               by this instruction.
 3974 // ins_cost   -- The estimated cost of this instruction is used by instruction
 3975 //               selection to identify a minimum cost tree of machine
 3976 //               instructions that matches a tree of machine-independent
 3977 //               instructions.
 3978 // format     -- A string providing the disassembly for this instruction.
 3979 //               The value of an instruction's operand may be inserted
 3980 //               by referring to it with a '$' prefix.
 3981 // opcode     -- Three instruction opcodes may be provided.  These are referred
 3982 //               to within an encode class as $primary, $secondary, and $tertiary
 3983 //               rrspectively.  The primary opcode is commonly used to
 3984 //               indicate the type of machine instruction, while secondary
 3985 //               and tertiary are often used for prefix options or addressing
 3986 //               modes.
 3987 // ins_encode -- A list of encode classes with parameters. The encode class
 3988 //               name must have been defined in an 'enc_class' specification
 3989 //               in the encode section of the architecture description.
 3990 
 3991 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 3992 // Load Float
 3993 instruct MoveF2VL(vlRegF dst, regF src) %{
 3994   match(Set dst src);
 3995   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 3996   ins_encode %{
 3997     ShouldNotReachHere();
 3998   %}
 3999   ins_pipe( fpu_reg_reg );
 4000 %}
 4001 
 4002 // Load Float
 4003 instruct MoveF2LEG(legRegF dst, regF src) %{
 4004   match(Set dst src);
 4005   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4006   ins_encode %{
 4007     ShouldNotReachHere();
 4008   %}
 4009   ins_pipe( fpu_reg_reg );
 4010 %}
 4011 
 4012 // Load Float
 4013 instruct MoveVL2F(regF dst, vlRegF src) %{
 4014   match(Set dst src);
 4015   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4016   ins_encode %{
 4017     ShouldNotReachHere();
 4018   %}
 4019   ins_pipe( fpu_reg_reg );
 4020 %}
 4021 
 4022 // Load Float
 4023 instruct MoveLEG2F(regF dst, legRegF src) %{
 4024   match(Set dst src);
 4025   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4026   ins_encode %{
 4027     ShouldNotReachHere();
 4028   %}
 4029   ins_pipe( fpu_reg_reg );
 4030 %}
 4031 
 4032 // Load Double
 4033 instruct MoveD2VL(vlRegD dst, regD src) %{
 4034   match(Set dst src);
 4035   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4036   ins_encode %{
 4037     ShouldNotReachHere();
 4038   %}
 4039   ins_pipe( fpu_reg_reg );
 4040 %}
 4041 
 4042 // Load Double
 4043 instruct MoveD2LEG(legRegD dst, regD src) %{
 4044   match(Set dst src);
 4045   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4046   ins_encode %{
 4047     ShouldNotReachHere();
 4048   %}
 4049   ins_pipe( fpu_reg_reg );
 4050 %}
 4051 
 4052 // Load Double
 4053 instruct MoveVL2D(regD dst, vlRegD src) %{
 4054   match(Set dst src);
 4055   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4056   ins_encode %{
 4057     ShouldNotReachHere();
 4058   %}
 4059   ins_pipe( fpu_reg_reg );
 4060 %}
 4061 
 4062 // Load Double
 4063 instruct MoveLEG2D(regD dst, legRegD src) %{
 4064   match(Set dst src);
 4065   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4066   ins_encode %{
 4067     ShouldNotReachHere();
 4068   %}
 4069   ins_pipe( fpu_reg_reg );
 4070 %}
 4071 
 4072 //----------Load/Store/Move Instructions---------------------------------------
 4073 //----------Load Instructions--------------------------------------------------
 4074 
 4075 // Load Byte (8 bit signed)
 4076 instruct loadB(rRegI dst, memory mem)
 4077 %{
 4078   match(Set dst (LoadB mem));
 4079 
 4080   ins_cost(125);
 4081   format %{ "movsbl  $dst, $mem\t# byte" %}
 4082 
 4083   ins_encode %{
 4084     __ movsbl($dst$$Register, $mem$$Address);
 4085   %}
 4086 
 4087   ins_pipe(ialu_reg_mem);
 4088 %}
 4089 
 4090 // Load Byte (8 bit signed) into Long Register
 4091 instruct loadB2L(rRegL dst, memory mem)
 4092 %{
 4093   match(Set dst (ConvI2L (LoadB mem)));
 4094 
 4095   ins_cost(125);
 4096   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4097 
 4098   ins_encode %{
 4099     __ movsbq($dst$$Register, $mem$$Address);
 4100   %}
 4101 
 4102   ins_pipe(ialu_reg_mem);
 4103 %}
 4104 
 4105 // Load Unsigned Byte (8 bit UNsigned)
 4106 instruct loadUB(rRegI dst, memory mem)
 4107 %{
 4108   match(Set dst (LoadUB mem));
 4109 
 4110   ins_cost(125);
 4111   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4112 
 4113   ins_encode %{
 4114     __ movzbl($dst$$Register, $mem$$Address);
 4115   %}
 4116 
 4117   ins_pipe(ialu_reg_mem);
 4118 %}
 4119 
 4120 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4121 instruct loadUB2L(rRegL dst, memory mem)
 4122 %{
 4123   match(Set dst (ConvI2L (LoadUB mem)));
 4124 
 4125   ins_cost(125);
 4126   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 4127 
 4128   ins_encode %{
 4129     __ movzbq($dst$$Register, $mem$$Address);
 4130   %}
 4131 
 4132   ins_pipe(ialu_reg_mem);
 4133 %}
 4134 
 4135 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 4136 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 4137   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 4138   effect(KILL cr);
 4139 
 4140   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 4141             "andl    $dst, right_n_bits($mask, 8)" %}
 4142   ins_encode %{
 4143     Register Rdst = $dst$$Register;
 4144     __ movzbq(Rdst, $mem$$Address);
 4145     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 4146   %}
 4147   ins_pipe(ialu_reg_mem);
 4148 %}
 4149 
 4150 // Load Short (16 bit signed)
 4151 instruct loadS(rRegI dst, memory mem)
 4152 %{
 4153   match(Set dst (LoadS mem));
 4154 
 4155   ins_cost(125);
 4156   format %{ "movswl $dst, $mem\t# short" %}
 4157 
 4158   ins_encode %{
 4159     __ movswl($dst$$Register, $mem$$Address);
 4160   %}
 4161 
 4162   ins_pipe(ialu_reg_mem);
 4163 %}
 4164 
 4165 // Load Short (16 bit signed) to Byte (8 bit signed)
 4166 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 4167   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 4168 
 4169   ins_cost(125);
 4170   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 4171   ins_encode %{
 4172     __ movsbl($dst$$Register, $mem$$Address);
 4173   %}
 4174   ins_pipe(ialu_reg_mem);
 4175 %}
 4176 
 4177 // Load Short (16 bit signed) into Long Register
 4178 instruct loadS2L(rRegL dst, memory mem)
 4179 %{
 4180   match(Set dst (ConvI2L (LoadS mem)));
 4181 
 4182   ins_cost(125);
 4183   format %{ "movswq $dst, $mem\t# short -> long" %}
 4184 
 4185   ins_encode %{
 4186     __ movswq($dst$$Register, $mem$$Address);
 4187   %}
 4188 
 4189   ins_pipe(ialu_reg_mem);
 4190 %}
 4191 
 4192 // Load Unsigned Short/Char (16 bit UNsigned)
 4193 instruct loadUS(rRegI dst, memory mem)
 4194 %{
 4195   match(Set dst (LoadUS mem));
 4196 
 4197   ins_cost(125);
 4198   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 4199 
 4200   ins_encode %{
 4201     __ movzwl($dst$$Register, $mem$$Address);
 4202   %}
 4203 
 4204   ins_pipe(ialu_reg_mem);
 4205 %}
 4206 
 4207 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 4208 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 4209   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 4210 
 4211   ins_cost(125);
 4212   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 4213   ins_encode %{
 4214     __ movsbl($dst$$Register, $mem$$Address);
 4215   %}
 4216   ins_pipe(ialu_reg_mem);
 4217 %}
 4218 
 4219 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 4220 instruct loadUS2L(rRegL dst, memory mem)
 4221 %{
 4222   match(Set dst (ConvI2L (LoadUS mem)));
 4223 
 4224   ins_cost(125);
 4225   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 4226 
 4227   ins_encode %{
 4228     __ movzwq($dst$$Register, $mem$$Address);
 4229   %}
 4230 
 4231   ins_pipe(ialu_reg_mem);
 4232 %}
 4233 
 4234 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 4235 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 4236   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 4237 
 4238   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 4239   ins_encode %{
 4240     __ movzbq($dst$$Register, $mem$$Address);
 4241   %}
 4242   ins_pipe(ialu_reg_mem);
 4243 %}
 4244 
 4245 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 4246 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 4247   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 4248   effect(KILL cr);
 4249 
 4250   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 4251             "andl    $dst, right_n_bits($mask, 16)" %}
 4252   ins_encode %{
 4253     Register Rdst = $dst$$Register;
 4254     __ movzwq(Rdst, $mem$$Address);
 4255     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 4256   %}
 4257   ins_pipe(ialu_reg_mem);
 4258 %}
 4259 
 4260 // Load Integer
 4261 instruct loadI(rRegI dst, memory mem)
 4262 %{
 4263   match(Set dst (LoadI mem));
 4264 
 4265   ins_cost(125);
 4266   format %{ "movl    $dst, $mem\t# int" %}
 4267 
 4268   ins_encode %{
 4269     __ movl($dst$$Register, $mem$$Address);
 4270   %}
 4271 
 4272   ins_pipe(ialu_reg_mem);
 4273 %}
 4274 
 4275 // Load Integer (32 bit signed) to Byte (8 bit signed)
 4276 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 4277   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 4278 
 4279   ins_cost(125);
 4280   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 4281   ins_encode %{
 4282     __ movsbl($dst$$Register, $mem$$Address);
 4283   %}
 4284   ins_pipe(ialu_reg_mem);
 4285 %}
 4286 
 4287 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 4288 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 4289   match(Set dst (AndI (LoadI mem) mask));
 4290 
 4291   ins_cost(125);
 4292   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 4293   ins_encode %{
 4294     __ movzbl($dst$$Register, $mem$$Address);
 4295   %}
 4296   ins_pipe(ialu_reg_mem);
 4297 %}
 4298 
 4299 // Load Integer (32 bit signed) to Short (16 bit signed)
 4300 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 4301   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 4302 
 4303   ins_cost(125);
 4304   format %{ "movswl  $dst, $mem\t# int -> short" %}
 4305   ins_encode %{
 4306     __ movswl($dst$$Register, $mem$$Address);
 4307   %}
 4308   ins_pipe(ialu_reg_mem);
 4309 %}
 4310 
 4311 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 4312 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 4313   match(Set dst (AndI (LoadI mem) mask));
 4314 
 4315   ins_cost(125);
 4316   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 4317   ins_encode %{
 4318     __ movzwl($dst$$Register, $mem$$Address);
 4319   %}
 4320   ins_pipe(ialu_reg_mem);
 4321 %}
 4322 
 4323 // Load Integer into Long Register
 4324 instruct loadI2L(rRegL dst, memory mem)
 4325 %{
 4326   match(Set dst (ConvI2L (LoadI mem)));
 4327 
 4328   ins_cost(125);
 4329   format %{ "movslq  $dst, $mem\t# int -> long" %}
 4330 
 4331   ins_encode %{
 4332     __ movslq($dst$$Register, $mem$$Address);
 4333   %}
 4334 
 4335   ins_pipe(ialu_reg_mem);
 4336 %}
 4337 
 4338 // Load Integer with mask 0xFF into Long Register
 4339 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 4340   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 4341 
 4342   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 4343   ins_encode %{
 4344     __ movzbq($dst$$Register, $mem$$Address);
 4345   %}
 4346   ins_pipe(ialu_reg_mem);
 4347 %}
 4348 
 4349 // Load Integer with mask 0xFFFF into Long Register
 4350 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 4351   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 4352 
 4353   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 4354   ins_encode %{
 4355     __ movzwq($dst$$Register, $mem$$Address);
 4356   %}
 4357   ins_pipe(ialu_reg_mem);
 4358 %}
 4359 
 4360 // Load Integer with a 31-bit mask into Long Register
 4361 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 4362   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 4363   effect(KILL cr);
 4364 
 4365   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 4366             "andl    $dst, $mask" %}
 4367   ins_encode %{
 4368     Register Rdst = $dst$$Register;
 4369     __ movl(Rdst, $mem$$Address);
 4370     __ andl(Rdst, $mask$$constant);
 4371   %}
 4372   ins_pipe(ialu_reg_mem);
 4373 %}
 4374 
 4375 // Load Unsigned Integer into Long Register
 4376 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 4377 %{
 4378   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 4379 
 4380   ins_cost(125);
 4381   format %{ "movl    $dst, $mem\t# uint -> long" %}
 4382 
 4383   ins_encode %{
 4384     __ movl($dst$$Register, $mem$$Address);
 4385   %}
 4386 
 4387   ins_pipe(ialu_reg_mem);
 4388 %}
 4389 
 4390 // Load Long
 4391 instruct loadL(rRegL dst, memory mem)
 4392 %{
 4393   match(Set dst (LoadL mem));
 4394 
 4395   ins_cost(125);
 4396   format %{ "movq    $dst, $mem\t# long" %}
 4397 
 4398   ins_encode %{
 4399     __ movq($dst$$Register, $mem$$Address);
 4400   %}
 4401 
 4402   ins_pipe(ialu_reg_mem); // XXX
 4403 %}
 4404 
 4405 // Load Range
 4406 instruct loadRange(rRegI dst, memory mem)
 4407 %{
 4408   match(Set dst (LoadRange mem));
 4409 
 4410   ins_cost(125); // XXX
 4411   format %{ "movl    $dst, $mem\t# range" %}
 4412   ins_encode %{
 4413     __ movl($dst$$Register, $mem$$Address);
 4414   %}
 4415   ins_pipe(ialu_reg_mem);
 4416 %}
 4417 
 4418 // Load Pointer
 4419 instruct loadP(rRegP dst, memory mem)
 4420 %{
 4421   match(Set dst (LoadP mem));
 4422   predicate(n->as_Load()->barrier_data() == 0);
 4423 
 4424   ins_cost(125); // XXX
 4425   format %{ "movq    $dst, $mem\t# ptr" %}
 4426   ins_encode %{
 4427     __ movq($dst$$Register, $mem$$Address);
 4428   %}
 4429   ins_pipe(ialu_reg_mem); // XXX
 4430 %}
 4431 
 4432 // Load Compressed Pointer
 4433 instruct loadN(rRegN dst, memory mem)
 4434 %{
 4435    match(Set dst (LoadN mem));
 4436 
 4437    ins_cost(125); // XXX
 4438    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 4439    ins_encode %{
 4440      __ movl($dst$$Register, $mem$$Address);
 4441    %}
 4442    ins_pipe(ialu_reg_mem); // XXX
 4443 %}
 4444 
 4445 
 4446 // Load Klass Pointer
 4447 instruct loadKlass(rRegP dst, memory mem)
 4448 %{
 4449   match(Set dst (LoadKlass mem));
 4450 
 4451   ins_cost(125); // XXX
 4452   format %{ "movq    $dst, $mem\t# class" %}
 4453   ins_encode %{
 4454     __ movq($dst$$Register, $mem$$Address);
 4455   %}
 4456   ins_pipe(ialu_reg_mem); // XXX
 4457 %}
 4458 
 4459 // Load narrow Klass Pointer
 4460 instruct loadNKlass(rRegN dst, memory mem)
 4461 %{
 4462   match(Set dst (LoadNKlass mem));
 4463 
 4464   ins_cost(125); // XXX
 4465   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 4466   ins_encode %{
 4467     __ movl($dst$$Register, $mem$$Address);
 4468   %}
 4469   ins_pipe(ialu_reg_mem); // XXX
 4470 %}
 4471 
 4472 // Load Float
 4473 instruct loadF(regF dst, memory mem)
 4474 %{
 4475   match(Set dst (LoadF mem));
 4476 
 4477   ins_cost(145); // XXX
 4478   format %{ "movss   $dst, $mem\t# float" %}
 4479   ins_encode %{
 4480     __ movflt($dst$$XMMRegister, $mem$$Address);
 4481   %}
 4482   ins_pipe(pipe_slow); // XXX
 4483 %}
 4484 
 4485 // Load Double
 4486 instruct loadD_partial(regD dst, memory mem)
 4487 %{
 4488   predicate(!UseXmmLoadAndClearUpper);
 4489   match(Set dst (LoadD mem));
 4490 
 4491   ins_cost(145); // XXX
 4492   format %{ "movlpd  $dst, $mem\t# double" %}
 4493   ins_encode %{
 4494     __ movdbl($dst$$XMMRegister, $mem$$Address);
 4495   %}
 4496   ins_pipe(pipe_slow); // XXX
 4497 %}
 4498 
 4499 instruct loadD(regD dst, memory mem)
 4500 %{
 4501   predicate(UseXmmLoadAndClearUpper);
 4502   match(Set dst (LoadD mem));
 4503 
 4504   ins_cost(145); // XXX
 4505   format %{ "movsd   $dst, $mem\t# double" %}
 4506   ins_encode %{
 4507     __ movdbl($dst$$XMMRegister, $mem$$Address);
 4508   %}
 4509   ins_pipe(pipe_slow); // XXX
 4510 %}
 4511 
 4512 
 4513 // Following pseudo code describes the algorithm for max[FD]:
 4514 // Min algorithm is on similar lines
 4515 //  btmp = (b < +0.0) ? a : b
 4516 //  atmp = (b < +0.0) ? b : a
 4517 //  Tmp  = Max_Float(atmp , btmp)
 4518 //  Res  = (atmp == NaN) ? atmp : Tmp
 4519 
 4520 // max = java.lang.Math.max(float a, float b)
 4521 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 4522   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 4523   match(Set dst (MaxF a b));
 4524   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 4525   format %{
 4526      "vblendvps        $btmp,$b,$a,$b           \n\t"
 4527      "vblendvps        $atmp,$a,$b,$b           \n\t"
 4528      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 4529      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 4530      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 4531   %}
 4532   ins_encode %{
 4533     int vector_len = Assembler::AVX_128bit;
 4534     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 4535     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 4536     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 4537     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 4538     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 4539  %}
 4540   ins_pipe( pipe_slow );
 4541 %}
 4542 
 4543 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 4544   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 4545   match(Set dst (MaxF a b));
 4546   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 4547 
 4548   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 4549   ins_encode %{
 4550     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 4551                     false /*min*/, true /*single*/);
 4552   %}
 4553   ins_pipe( pipe_slow );
 4554 %}
 4555 
 4556 // max = java.lang.Math.max(double a, double b)
 4557 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 4558   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 4559   match(Set dst (MaxD a b));
 4560   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 4561   format %{
 4562      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 4563      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 4564      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 4565      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 4566      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 4567   %}
 4568   ins_encode %{
 4569     int vector_len = Assembler::AVX_128bit;
 4570     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 4571     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 4572     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 4573     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 4574     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 4575   %}
 4576   ins_pipe( pipe_slow );
 4577 %}
 4578 
 4579 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 4580   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 4581   match(Set dst (MaxD a b));
 4582   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 4583 
 4584   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 4585   ins_encode %{
 4586     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 4587                     false /*min*/, false /*single*/);
 4588   %}
 4589   ins_pipe( pipe_slow );
 4590 %}
 4591 
 4592 // min = java.lang.Math.min(float a, float b)
 4593 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 4594   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 4595   match(Set dst (MinF a b));
 4596   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 4597   format %{
 4598      "vblendvps        $atmp,$a,$b,$a             \n\t"
 4599      "vblendvps        $btmp,$b,$a,$a             \n\t"
 4600      "vminss           $tmp,$atmp,$btmp           \n\t"
 4601      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 4602      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 4603   %}
 4604   ins_encode %{
 4605     int vector_len = Assembler::AVX_128bit;
 4606     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 4607     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 4608     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 4609     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 4610     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 4611   %}
 4612   ins_pipe( pipe_slow );
 4613 %}
 4614 
 4615 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 4616   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 4617   match(Set dst (MinF a b));
 4618   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 4619 
 4620   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 4621   ins_encode %{
 4622     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 4623                     true /*min*/, true /*single*/);
 4624   %}
 4625   ins_pipe( pipe_slow );
 4626 %}
 4627 
 4628 // min = java.lang.Math.min(double a, double b)
 4629 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 4630   predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
 4631   match(Set dst (MinD a b));
 4632   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 4633   format %{
 4634      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 4635      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 4636      "vminsd           $tmp,$atmp,$btmp         \n\t"
 4637      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 4638      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 4639   %}
 4640   ins_encode %{
 4641     int vector_len = Assembler::AVX_128bit;
 4642     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 4643     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 4644     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 4645     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 4646     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 4647   %}
 4648   ins_pipe( pipe_slow );
 4649 %}
 4650 
 4651 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 4652   predicate(UseAVX > 0 && SuperWord::is_reduction(n));
 4653   match(Set dst (MinD a b));
 4654   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 4655 
 4656   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 4657   ins_encode %{
 4658     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 4659                     true /*min*/, false /*single*/);
 4660   %}
 4661   ins_pipe( pipe_slow );
 4662 %}
 4663 
 4664 // Load Effective Address
 4665 instruct leaP8(rRegP dst, indOffset8 mem)
 4666 %{
 4667   match(Set dst mem);
 4668 
 4669   ins_cost(110); // XXX
 4670   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 4671   ins_encode %{
 4672     __ leaq($dst$$Register, $mem$$Address);
 4673   %}
 4674   ins_pipe(ialu_reg_reg_fat);
 4675 %}
 4676 
 4677 instruct leaP32(rRegP dst, indOffset32 mem)
 4678 %{
 4679   match(Set dst mem);
 4680 
 4681   ins_cost(110);
 4682   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 4683   ins_encode %{
 4684     __ leaq($dst$$Register, $mem$$Address);
 4685   %}
 4686   ins_pipe(ialu_reg_reg_fat);
 4687 %}
 4688 
 4689 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 4690 %{
 4691   match(Set dst mem);
 4692 
 4693   ins_cost(110);
 4694   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 4695   ins_encode %{
 4696     __ leaq($dst$$Register, $mem$$Address);
 4697   %}
 4698   ins_pipe(ialu_reg_reg_fat);
 4699 %}
 4700 
 4701 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 4702 %{
 4703   match(Set dst mem);
 4704 
 4705   ins_cost(110);
 4706   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 4707   ins_encode %{
 4708     __ leaq($dst$$Register, $mem$$Address);
 4709   %}
 4710   ins_pipe(ialu_reg_reg_fat);
 4711 %}
 4712 
 4713 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 4714 %{
 4715   match(Set dst mem);
 4716 
 4717   ins_cost(110);
 4718   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 4719   ins_encode %{
 4720     __ leaq($dst$$Register, $mem$$Address);
 4721   %}
 4722   ins_pipe(ialu_reg_reg_fat);
 4723 %}
 4724 
 4725 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 4726 %{
 4727   match(Set dst mem);
 4728 
 4729   ins_cost(110);
 4730   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 4731   ins_encode %{
 4732     __ leaq($dst$$Register, $mem$$Address);
 4733   %}
 4734   ins_pipe(ialu_reg_reg_fat);
 4735 %}
 4736 
 4737 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 4738 %{
 4739   match(Set dst mem);
 4740 
 4741   ins_cost(110);
 4742   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 4743   ins_encode %{
 4744     __ leaq($dst$$Register, $mem$$Address);
 4745   %}
 4746   ins_pipe(ialu_reg_reg_fat);
 4747 %}
 4748 
 4749 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 4750 %{
 4751   match(Set dst mem);
 4752 
 4753   ins_cost(110);
 4754   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 4755   ins_encode %{
 4756     __ leaq($dst$$Register, $mem$$Address);
 4757   %}
 4758   ins_pipe(ialu_reg_reg_fat);
 4759 %}
 4760 
 4761 // Load Effective Address which uses Narrow (32-bits) oop
 4762 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 4763 %{
 4764   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 4765   match(Set dst mem);
 4766 
 4767   ins_cost(110);
 4768   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 4769   ins_encode %{
 4770     __ leaq($dst$$Register, $mem$$Address);
 4771   %}
 4772   ins_pipe(ialu_reg_reg_fat);
 4773 %}
 4774 
 4775 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 4776 %{
 4777   predicate(CompressedOops::shift() == 0);
 4778   match(Set dst mem);
 4779 
 4780   ins_cost(110); // XXX
 4781   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 4782   ins_encode %{
 4783     __ leaq($dst$$Register, $mem$$Address);
 4784   %}
 4785   ins_pipe(ialu_reg_reg_fat);
 4786 %}
 4787 
 4788 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 4789 %{
 4790   predicate(CompressedOops::shift() == 0);
 4791   match(Set dst mem);
 4792 
 4793   ins_cost(110);
 4794   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 4795   ins_encode %{
 4796     __ leaq($dst$$Register, $mem$$Address);
 4797   %}
 4798   ins_pipe(ialu_reg_reg_fat);
 4799 %}
 4800 
 4801 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 4802 %{
 4803   predicate(CompressedOops::shift() == 0);
 4804   match(Set dst mem);
 4805 
 4806   ins_cost(110);
 4807   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 4808   ins_encode %{
 4809     __ leaq($dst$$Register, $mem$$Address);
 4810   %}
 4811   ins_pipe(ialu_reg_reg_fat);
 4812 %}
 4813 
 4814 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 4815 %{
 4816   predicate(CompressedOops::shift() == 0);
 4817   match(Set dst mem);
 4818 
 4819   ins_cost(110);
 4820   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 4821   ins_encode %{
 4822     __ leaq($dst$$Register, $mem$$Address);
 4823   %}
 4824   ins_pipe(ialu_reg_reg_fat);
 4825 %}
 4826 
 4827 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 4828 %{
 4829   predicate(CompressedOops::shift() == 0);
 4830   match(Set dst mem);
 4831 
 4832   ins_cost(110);
 4833   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 4834   ins_encode %{
 4835     __ leaq($dst$$Register, $mem$$Address);
 4836   %}
 4837   ins_pipe(ialu_reg_reg_fat);
 4838 %}
 4839 
 4840 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 4841 %{
 4842   predicate(CompressedOops::shift() == 0);
 4843   match(Set dst mem);
 4844 
 4845   ins_cost(110);
 4846   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 4847   ins_encode %{
 4848     __ leaq($dst$$Register, $mem$$Address);
 4849   %}
 4850   ins_pipe(ialu_reg_reg_fat);
 4851 %}
 4852 
 4853 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 4854 %{
 4855   predicate(CompressedOops::shift() == 0);
 4856   match(Set dst mem);
 4857 
 4858   ins_cost(110);
 4859   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 4860   ins_encode %{
 4861     __ leaq($dst$$Register, $mem$$Address);
 4862   %}
 4863   ins_pipe(ialu_reg_reg_fat);
 4864 %}
 4865 
 4866 instruct loadConI(rRegI dst, immI src)
 4867 %{
 4868   match(Set dst src);
 4869 
 4870   format %{ "movl    $dst, $src\t# int" %}
 4871   ins_encode %{
 4872     __ movl($dst$$Register, $src$$constant);
 4873   %}
 4874   ins_pipe(ialu_reg_fat); // XXX
 4875 %}
 4876 
 4877 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 4878 %{
 4879   match(Set dst src);
 4880   effect(KILL cr);
 4881 
 4882   ins_cost(50);
 4883   format %{ "xorl    $dst, $dst\t# int" %}
 4884   ins_encode %{
 4885     __ xorl($dst$$Register, $dst$$Register);
 4886   %}
 4887   ins_pipe(ialu_reg);
 4888 %}
 4889 
 4890 instruct loadConL(rRegL dst, immL src)
 4891 %{
 4892   match(Set dst src);
 4893 
 4894   ins_cost(150);
 4895   format %{ "movq    $dst, $src\t# long" %}
 4896   ins_encode %{
 4897     __ mov64($dst$$Register, $src$$constant);
 4898   %}
 4899   ins_pipe(ialu_reg);
 4900 %}
 4901 
 4902 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 4903 %{
 4904   match(Set dst src);
 4905   effect(KILL cr);
 4906 
 4907   ins_cost(50);
 4908   format %{ "xorl    $dst, $dst\t# long" %}
 4909   ins_encode %{
 4910     __ xorl($dst$$Register, $dst$$Register);
 4911   %}
 4912   ins_pipe(ialu_reg); // XXX
 4913 %}
 4914 
 4915 instruct loadConUL32(rRegL dst, immUL32 src)
 4916 %{
 4917   match(Set dst src);
 4918 
 4919   ins_cost(60);
 4920   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 4921   ins_encode %{
 4922     __ movl($dst$$Register, $src$$constant);
 4923   %}
 4924   ins_pipe(ialu_reg);
 4925 %}
 4926 
 4927 instruct loadConL32(rRegL dst, immL32 src)
 4928 %{
 4929   match(Set dst src);
 4930 
 4931   ins_cost(70);
 4932   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 4933   ins_encode %{
 4934     __ movq($dst$$Register, $src$$constant);
 4935   %}
 4936   ins_pipe(ialu_reg);
 4937 %}
 4938 
 4939 instruct loadConP(rRegP dst, immP con) %{
 4940   match(Set dst con);
 4941 
 4942   format %{ "movq    $dst, $con\t# ptr" %}
 4943   ins_encode %{
 4944     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 4945   %}
 4946   ins_pipe(ialu_reg_fat); // XXX
 4947 %}
 4948 
 4949 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 4950 %{
 4951   match(Set dst src);
 4952   effect(KILL cr);
 4953 
 4954   ins_cost(50);
 4955   format %{ "xorl    $dst, $dst\t# ptr" %}
 4956   ins_encode %{
 4957     __ xorl($dst$$Register, $dst$$Register);
 4958   %}
 4959   ins_pipe(ialu_reg);
 4960 %}
 4961 
 4962 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 4963 %{
 4964   match(Set dst src);
 4965   effect(KILL cr);
 4966 
 4967   ins_cost(60);
 4968   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 4969   ins_encode %{
 4970     __ movl($dst$$Register, $src$$constant);
 4971   %}
 4972   ins_pipe(ialu_reg);
 4973 %}
 4974 
 4975 instruct loadConF(regF dst, immF con) %{
 4976   match(Set dst con);
 4977   ins_cost(125);
 4978   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 4979   ins_encode %{
 4980     __ movflt($dst$$XMMRegister, $constantaddress($con));
 4981   %}
 4982   ins_pipe(pipe_slow);
 4983 %}
 4984 
 4985 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 4986   match(Set dst src);
 4987   effect(KILL cr);
 4988   format %{ "xorq    $dst, $src\t# compressed nullptr ptr" %}
 4989   ins_encode %{
 4990     __ xorq($dst$$Register, $dst$$Register);
 4991   %}
 4992   ins_pipe(ialu_reg);
 4993 %}
 4994 
 4995 instruct loadConN(rRegN dst, immN src) %{
 4996   match(Set dst src);
 4997 
 4998   ins_cost(125);
 4999   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5000   ins_encode %{
 5001     address con = (address)$src$$constant;
 5002     if (con == nullptr) {
 5003       ShouldNotReachHere();
 5004     } else {
 5005       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5006     }
 5007   %}
 5008   ins_pipe(ialu_reg_fat); // XXX
 5009 %}
 5010 
 5011 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5012   match(Set dst src);
 5013 
 5014   ins_cost(125);
 5015   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5016   ins_encode %{
 5017     address con = (address)$src$$constant;
 5018     if (con == nullptr) {
 5019       ShouldNotReachHere();
 5020     } else {
 5021       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5022     }
 5023   %}
 5024   ins_pipe(ialu_reg_fat); // XXX
 5025 %}
 5026 
 5027 instruct loadConF0(regF dst, immF0 src)
 5028 %{
 5029   match(Set dst src);
 5030   ins_cost(100);
 5031 
 5032   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5033   ins_encode %{
 5034     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5035   %}
 5036   ins_pipe(pipe_slow);
 5037 %}
 5038 
 5039 // Use the same format since predicate() can not be used here.
 5040 instruct loadConD(regD dst, immD con) %{
 5041   match(Set dst con);
 5042   ins_cost(125);
 5043   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5044   ins_encode %{
 5045     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5046   %}
 5047   ins_pipe(pipe_slow);
 5048 %}
 5049 
 5050 instruct loadConD0(regD dst, immD0 src)
 5051 %{
 5052   match(Set dst src);
 5053   ins_cost(100);
 5054 
 5055   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5056   ins_encode %{
 5057     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 5058   %}
 5059   ins_pipe(pipe_slow);
 5060 %}
 5061 
 5062 instruct loadSSI(rRegI dst, stackSlotI src)
 5063 %{
 5064   match(Set dst src);
 5065 
 5066   ins_cost(125);
 5067   format %{ "movl    $dst, $src\t# int stk" %}
 5068   ins_encode %{
 5069     __ movl($dst$$Register, $src$$Address);
 5070   %}
 5071   ins_pipe(ialu_reg_mem);
 5072 %}
 5073 
 5074 instruct loadSSL(rRegL dst, stackSlotL src)
 5075 %{
 5076   match(Set dst src);
 5077 
 5078   ins_cost(125);
 5079   format %{ "movq    $dst, $src\t# long stk" %}
 5080   ins_encode %{
 5081     __ movq($dst$$Register, $src$$Address);
 5082   %}
 5083   ins_pipe(ialu_reg_mem);
 5084 %}
 5085 
 5086 instruct loadSSP(rRegP dst, stackSlotP src)
 5087 %{
 5088   match(Set dst src);
 5089 
 5090   ins_cost(125);
 5091   format %{ "movq    $dst, $src\t# ptr stk" %}
 5092   ins_encode %{
 5093     __ movq($dst$$Register, $src$$Address);
 5094   %}
 5095   ins_pipe(ialu_reg_mem);
 5096 %}
 5097 
 5098 instruct loadSSF(regF dst, stackSlotF src)
 5099 %{
 5100   match(Set dst src);
 5101 
 5102   ins_cost(125);
 5103   format %{ "movss   $dst, $src\t# float stk" %}
 5104   ins_encode %{
 5105     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5106   %}
 5107   ins_pipe(pipe_slow); // XXX
 5108 %}
 5109 
 5110 // Use the same format since predicate() can not be used here.
 5111 instruct loadSSD(regD dst, stackSlotD src)
 5112 %{
 5113   match(Set dst src);
 5114 
 5115   ins_cost(125);
 5116   format %{ "movsd   $dst, $src\t# double stk" %}
 5117   ins_encode  %{
 5118     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5119   %}
 5120   ins_pipe(pipe_slow); // XXX
 5121 %}
 5122 
 5123 // Prefetch instructions for allocation.
 5124 // Must be safe to execute with invalid address (cannot fault).
 5125 
 5126 instruct prefetchAlloc( memory mem ) %{
 5127   predicate(AllocatePrefetchInstr==3);
 5128   match(PrefetchAllocation mem);
 5129   ins_cost(125);
 5130 
 5131   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 5132   ins_encode %{
 5133     __ prefetchw($mem$$Address);
 5134   %}
 5135   ins_pipe(ialu_mem);
 5136 %}
 5137 
 5138 instruct prefetchAllocNTA( memory mem ) %{
 5139   predicate(AllocatePrefetchInstr==0);
 5140   match(PrefetchAllocation mem);
 5141   ins_cost(125);
 5142 
 5143   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 5144   ins_encode %{
 5145     __ prefetchnta($mem$$Address);
 5146   %}
 5147   ins_pipe(ialu_mem);
 5148 %}
 5149 
 5150 instruct prefetchAllocT0( memory mem ) %{
 5151   predicate(AllocatePrefetchInstr==1);
 5152   match(PrefetchAllocation mem);
 5153   ins_cost(125);
 5154 
 5155   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 5156   ins_encode %{
 5157     __ prefetcht0($mem$$Address);
 5158   %}
 5159   ins_pipe(ialu_mem);
 5160 %}
 5161 
 5162 instruct prefetchAllocT2( memory mem ) %{
 5163   predicate(AllocatePrefetchInstr==2);
 5164   match(PrefetchAllocation mem);
 5165   ins_cost(125);
 5166 
 5167   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 5168   ins_encode %{
 5169     __ prefetcht2($mem$$Address);
 5170   %}
 5171   ins_pipe(ialu_mem);
 5172 %}
 5173 
 5174 //----------Store Instructions-------------------------------------------------
 5175 
 5176 // Store Byte
 5177 instruct storeB(memory mem, rRegI src)
 5178 %{
 5179   match(Set mem (StoreB mem src));
 5180 
 5181   ins_cost(125); // XXX
 5182   format %{ "movb    $mem, $src\t# byte" %}
 5183   ins_encode %{
 5184     __ movb($mem$$Address, $src$$Register);
 5185   %}
 5186   ins_pipe(ialu_mem_reg);
 5187 %}
 5188 
 5189 // Store Char/Short
 5190 instruct storeC(memory mem, rRegI src)
 5191 %{
 5192   match(Set mem (StoreC mem src));
 5193 
 5194   ins_cost(125); // XXX
 5195   format %{ "movw    $mem, $src\t# char/short" %}
 5196   ins_encode %{
 5197     __ movw($mem$$Address, $src$$Register);
 5198   %}
 5199   ins_pipe(ialu_mem_reg);
 5200 %}
 5201 
 5202 // Store Integer
 5203 instruct storeI(memory mem, rRegI src)
 5204 %{
 5205   match(Set mem (StoreI mem src));
 5206 
 5207   ins_cost(125); // XXX
 5208   format %{ "movl    $mem, $src\t# int" %}
 5209   ins_encode %{
 5210     __ movl($mem$$Address, $src$$Register);
 5211   %}
 5212   ins_pipe(ialu_mem_reg);
 5213 %}
 5214 
 5215 // Store Long
 5216 instruct storeL(memory mem, rRegL src)
 5217 %{
 5218   match(Set mem (StoreL mem src));
 5219 
 5220   ins_cost(125); // XXX
 5221   format %{ "movq    $mem, $src\t# long" %}
 5222   ins_encode %{
 5223     __ movq($mem$$Address, $src$$Register);
 5224   %}
 5225   ins_pipe(ialu_mem_reg); // XXX
 5226 %}
 5227 
 5228 // Store Pointer
 5229 instruct storeP(memory mem, any_RegP src)
 5230 %{
 5231   predicate(n->as_Store()->barrier_data() == 0);
 5232   match(Set mem (StoreP mem src));
 5233 
 5234   ins_cost(125); // XXX
 5235   format %{ "movq    $mem, $src\t# ptr" %}
 5236   ins_encode %{
 5237     __ movq($mem$$Address, $src$$Register);
 5238   %}
 5239   ins_pipe(ialu_mem_reg);
 5240 %}
 5241 
 5242 instruct storeImmP0(memory mem, immP0 zero)
 5243 %{
 5244   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 5245   match(Set mem (StoreP mem zero));
 5246 
 5247   ins_cost(125); // XXX
 5248   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 5249   ins_encode %{
 5250     __ movq($mem$$Address, r12);
 5251   %}
 5252   ins_pipe(ialu_mem_reg);
 5253 %}
 5254 
 5255 // Store nullptr Pointer, mark word, or other simple pointer constant.
 5256 instruct storeImmP(memory mem, immP31 src)
 5257 %{
 5258   predicate(n->as_Store()->barrier_data() == 0);
 5259   match(Set mem (StoreP mem src));
 5260 
 5261   ins_cost(150); // XXX
 5262   format %{ "movq    $mem, $src\t# ptr" %}
 5263   ins_encode %{
 5264     __ movq($mem$$Address, $src$$constant);
 5265   %}
 5266   ins_pipe(ialu_mem_imm);
 5267 %}
 5268 
 5269 // Store Compressed Pointer
 5270 instruct storeN(memory mem, rRegN src)
 5271 %{
 5272   match(Set mem (StoreN mem src));
 5273 
 5274   ins_cost(125); // XXX
 5275   format %{ "movl    $mem, $src\t# compressed ptr" %}
 5276   ins_encode %{
 5277     __ movl($mem$$Address, $src$$Register);
 5278   %}
 5279   ins_pipe(ialu_mem_reg);
 5280 %}
 5281 
 5282 instruct storeNKlass(memory mem, rRegN src)
 5283 %{
 5284   match(Set mem (StoreNKlass mem src));
 5285 
 5286   ins_cost(125); // XXX
 5287   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 5288   ins_encode %{
 5289     __ movl($mem$$Address, $src$$Register);
 5290   %}
 5291   ins_pipe(ialu_mem_reg);
 5292 %}
 5293 
 5294 instruct storeImmN0(memory mem, immN0 zero)
 5295 %{
 5296   predicate(CompressedOops::base() == nullptr);
 5297   match(Set mem (StoreN mem zero));
 5298 
 5299   ins_cost(125); // XXX
 5300   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 5301   ins_encode %{
 5302     __ movl($mem$$Address, r12);
 5303   %}
 5304   ins_pipe(ialu_mem_reg);
 5305 %}
 5306 
 5307 instruct storeImmN(memory mem, immN src)
 5308 %{
 5309   match(Set mem (StoreN mem src));
 5310 
 5311   ins_cost(150); // XXX
 5312   format %{ "movl    $mem, $src\t# compressed ptr" %}
 5313   ins_encode %{
 5314     address con = (address)$src$$constant;
 5315     if (con == nullptr) {
 5316       __ movl($mem$$Address, 0);
 5317     } else {
 5318       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 5319     }
 5320   %}
 5321   ins_pipe(ialu_mem_imm);
 5322 %}
 5323 
 5324 instruct storeImmNKlass(memory mem, immNKlass src)
 5325 %{
 5326   match(Set mem (StoreNKlass mem src));
 5327 
 5328   ins_cost(150); // XXX
 5329   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 5330   ins_encode %{
 5331     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 5332   %}
 5333   ins_pipe(ialu_mem_imm);
 5334 %}
 5335 
 5336 // Store Integer Immediate
 5337 instruct storeImmI0(memory mem, immI_0 zero)
 5338 %{
 5339   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5340   match(Set mem (StoreI mem zero));
 5341 
 5342   ins_cost(125); // XXX
 5343   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 5344   ins_encode %{
 5345     __ movl($mem$$Address, r12);
 5346   %}
 5347   ins_pipe(ialu_mem_reg);
 5348 %}
 5349 
 5350 instruct storeImmI(memory mem, immI src)
 5351 %{
 5352   match(Set mem (StoreI mem src));
 5353 
 5354   ins_cost(150);
 5355   format %{ "movl    $mem, $src\t# int" %}
 5356   ins_encode %{
 5357     __ movl($mem$$Address, $src$$constant);
 5358   %}
 5359   ins_pipe(ialu_mem_imm);
 5360 %}
 5361 
 5362 // Store Long Immediate
 5363 instruct storeImmL0(memory mem, immL0 zero)
 5364 %{
 5365   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5366   match(Set mem (StoreL mem zero));
 5367 
 5368   ins_cost(125); // XXX
 5369   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 5370   ins_encode %{
 5371     __ movq($mem$$Address, r12);
 5372   %}
 5373   ins_pipe(ialu_mem_reg);
 5374 %}
 5375 
 5376 instruct storeImmL(memory mem, immL32 src)
 5377 %{
 5378   match(Set mem (StoreL mem src));
 5379 
 5380   ins_cost(150);
 5381   format %{ "movq    $mem, $src\t# long" %}
 5382   ins_encode %{
 5383     __ movq($mem$$Address, $src$$constant);
 5384   %}
 5385   ins_pipe(ialu_mem_imm);
 5386 %}
 5387 
 5388 // Store Short/Char Immediate
 5389 instruct storeImmC0(memory mem, immI_0 zero)
 5390 %{
 5391   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5392   match(Set mem (StoreC mem zero));
 5393 
 5394   ins_cost(125); // XXX
 5395   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 5396   ins_encode %{
 5397     __ movw($mem$$Address, r12);
 5398   %}
 5399   ins_pipe(ialu_mem_reg);
 5400 %}
 5401 
 5402 instruct storeImmI16(memory mem, immI16 src)
 5403 %{
 5404   predicate(UseStoreImmI16);
 5405   match(Set mem (StoreC mem src));
 5406 
 5407   ins_cost(150);
 5408   format %{ "movw    $mem, $src\t# short/char" %}
 5409   ins_encode %{
 5410     __ movw($mem$$Address, $src$$constant);
 5411   %}
 5412   ins_pipe(ialu_mem_imm);
 5413 %}
 5414 
 5415 // Store Byte Immediate
 5416 instruct storeImmB0(memory mem, immI_0 zero)
 5417 %{
 5418   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5419   match(Set mem (StoreB mem zero));
 5420 
 5421   ins_cost(125); // XXX
 5422   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 5423   ins_encode %{
 5424     __ movb($mem$$Address, r12);
 5425   %}
 5426   ins_pipe(ialu_mem_reg);
 5427 %}
 5428 
 5429 instruct storeImmB(memory mem, immI8 src)
 5430 %{
 5431   match(Set mem (StoreB mem src));
 5432 
 5433   ins_cost(150); // XXX
 5434   format %{ "movb    $mem, $src\t# byte" %}
 5435   ins_encode %{
 5436     __ movb($mem$$Address, $src$$constant);
 5437   %}
 5438   ins_pipe(ialu_mem_imm);
 5439 %}
 5440 
 5441 // Store CMS card-mark Immediate
 5442 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 5443 %{
 5444   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5445   match(Set mem (StoreCM mem zero));
 5446 
 5447   ins_cost(125); // XXX
 5448   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 5449   ins_encode %{
 5450     __ movb($mem$$Address, r12);
 5451   %}
 5452   ins_pipe(ialu_mem_reg);
 5453 %}
 5454 
 5455 instruct storeImmCM0(memory mem, immI_0 src)
 5456 %{
 5457   match(Set mem (StoreCM mem src));
 5458 
 5459   ins_cost(150); // XXX
 5460   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 5461   ins_encode %{
 5462     __ movb($mem$$Address, $src$$constant);
 5463   %}
 5464   ins_pipe(ialu_mem_imm);
 5465 %}
 5466 
 5467 // Store Float
 5468 instruct storeF(memory mem, regF src)
 5469 %{
 5470   match(Set mem (StoreF mem src));
 5471 
 5472   ins_cost(95); // XXX
 5473   format %{ "movss   $mem, $src\t# float" %}
 5474   ins_encode %{
 5475     __ movflt($mem$$Address, $src$$XMMRegister);
 5476   %}
 5477   ins_pipe(pipe_slow); // XXX
 5478 %}
 5479 
 5480 // Store immediate Float value (it is faster than store from XMM register)
 5481 instruct storeF0(memory mem, immF0 zero)
 5482 %{
 5483   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5484   match(Set mem (StoreF mem zero));
 5485 
 5486   ins_cost(25); // XXX
 5487   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 5488   ins_encode %{
 5489     __ movl($mem$$Address, r12);
 5490   %}
 5491   ins_pipe(ialu_mem_reg);
 5492 %}
 5493 
 5494 instruct storeF_imm(memory mem, immF src)
 5495 %{
 5496   match(Set mem (StoreF mem src));
 5497 
 5498   ins_cost(50);
 5499   format %{ "movl    $mem, $src\t# float" %}
 5500   ins_encode %{
 5501     __ movl($mem$$Address, jint_cast($src$$constant));
 5502   %}
 5503   ins_pipe(ialu_mem_imm);
 5504 %}
 5505 
 5506 // Store Double
 5507 instruct storeD(memory mem, regD src)
 5508 %{
 5509   match(Set mem (StoreD mem src));
 5510 
 5511   ins_cost(95); // XXX
 5512   format %{ "movsd   $mem, $src\t# double" %}
 5513   ins_encode %{
 5514     __ movdbl($mem$$Address, $src$$XMMRegister);
 5515   %}
 5516   ins_pipe(pipe_slow); // XXX
 5517 %}
 5518 
 5519 // Store immediate double 0.0 (it is faster than store from XMM register)
 5520 instruct storeD0_imm(memory mem, immD0 src)
 5521 %{
 5522   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 5523   match(Set mem (StoreD mem src));
 5524 
 5525   ins_cost(50);
 5526   format %{ "movq    $mem, $src\t# double 0." %}
 5527   ins_encode %{
 5528     __ movq($mem$$Address, $src$$constant);
 5529   %}
 5530   ins_pipe(ialu_mem_imm);
 5531 %}
 5532 
 5533 instruct storeD0(memory mem, immD0 zero)
 5534 %{
 5535   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 5536   match(Set mem (StoreD mem zero));
 5537 
 5538   ins_cost(25); // XXX
 5539   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 5540   ins_encode %{
 5541     __ movq($mem$$Address, r12);
 5542   %}
 5543   ins_pipe(ialu_mem_reg);
 5544 %}
 5545 
 5546 instruct storeSSI(stackSlotI dst, rRegI src)
 5547 %{
 5548   match(Set dst src);
 5549 
 5550   ins_cost(100);
 5551   format %{ "movl    $dst, $src\t# int stk" %}
 5552   ins_encode %{
 5553     __ movl($dst$$Address, $src$$Register);
 5554   %}
 5555   ins_pipe( ialu_mem_reg );
 5556 %}
 5557 
 5558 instruct storeSSL(stackSlotL dst, rRegL src)
 5559 %{
 5560   match(Set dst src);
 5561 
 5562   ins_cost(100);
 5563   format %{ "movq    $dst, $src\t# long stk" %}
 5564   ins_encode %{
 5565     __ movq($dst$$Address, $src$$Register);
 5566   %}
 5567   ins_pipe(ialu_mem_reg);
 5568 %}
 5569 
 5570 instruct storeSSP(stackSlotP dst, rRegP src)
 5571 %{
 5572   match(Set dst src);
 5573 
 5574   ins_cost(100);
 5575   format %{ "movq    $dst, $src\t# ptr stk" %}
 5576   ins_encode %{
 5577     __ movq($dst$$Address, $src$$Register);
 5578   %}
 5579   ins_pipe(ialu_mem_reg);
 5580 %}
 5581 
 5582 instruct storeSSF(stackSlotF dst, regF src)
 5583 %{
 5584   match(Set dst src);
 5585 
 5586   ins_cost(95); // XXX
 5587   format %{ "movss   $dst, $src\t# float stk" %}
 5588   ins_encode %{
 5589     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 5590   %}
 5591   ins_pipe(pipe_slow); // XXX
 5592 %}
 5593 
 5594 instruct storeSSD(stackSlotD dst, regD src)
 5595 %{
 5596   match(Set dst src);
 5597 
 5598   ins_cost(95); // XXX
 5599   format %{ "movsd   $dst, $src\t# double stk" %}
 5600   ins_encode %{
 5601     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 5602   %}
 5603   ins_pipe(pipe_slow); // XXX
 5604 %}
 5605 
 5606 instruct cacheWB(indirect addr)
 5607 %{
 5608   predicate(VM_Version::supports_data_cache_line_flush());
 5609   match(CacheWB addr);
 5610 
 5611   ins_cost(100);
 5612   format %{"cache wb $addr" %}
 5613   ins_encode %{
 5614     assert($addr->index_position() < 0, "should be");
 5615     assert($addr$$disp == 0, "should be");
 5616     __ cache_wb(Address($addr$$base$$Register, 0));
 5617   %}
 5618   ins_pipe(pipe_slow); // XXX
 5619 %}
 5620 
 5621 instruct cacheWBPreSync()
 5622 %{
 5623   predicate(VM_Version::supports_data_cache_line_flush());
 5624   match(CacheWBPreSync);
 5625 
 5626   ins_cost(100);
 5627   format %{"cache wb presync" %}
 5628   ins_encode %{
 5629     __ cache_wbsync(true);
 5630   %}
 5631   ins_pipe(pipe_slow); // XXX
 5632 %}
 5633 
 5634 instruct cacheWBPostSync()
 5635 %{
 5636   predicate(VM_Version::supports_data_cache_line_flush());
 5637   match(CacheWBPostSync);
 5638 
 5639   ins_cost(100);
 5640   format %{"cache wb postsync" %}
 5641   ins_encode %{
 5642     __ cache_wbsync(false);
 5643   %}
 5644   ins_pipe(pipe_slow); // XXX
 5645 %}
 5646 
 5647 //----------BSWAP Instructions-------------------------------------------------
 5648 instruct bytes_reverse_int(rRegI dst) %{
 5649   match(Set dst (ReverseBytesI dst));
 5650 
 5651   format %{ "bswapl  $dst" %}
 5652   ins_encode %{
 5653     __ bswapl($dst$$Register);
 5654   %}
 5655   ins_pipe( ialu_reg );
 5656 %}
 5657 
 5658 instruct bytes_reverse_long(rRegL dst) %{
 5659   match(Set dst (ReverseBytesL dst));
 5660 
 5661   format %{ "bswapq  $dst" %}
 5662   ins_encode %{
 5663     __ bswapq($dst$$Register);
 5664   %}
 5665   ins_pipe( ialu_reg);
 5666 %}
 5667 
 5668 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 5669   match(Set dst (ReverseBytesUS dst));
 5670   effect(KILL cr);
 5671 
 5672   format %{ "bswapl  $dst\n\t"
 5673             "shrl    $dst,16\n\t" %}
 5674   ins_encode %{
 5675     __ bswapl($dst$$Register);
 5676     __ shrl($dst$$Register, 16);
 5677   %}
 5678   ins_pipe( ialu_reg );
 5679 %}
 5680 
 5681 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 5682   match(Set dst (ReverseBytesS dst));
 5683   effect(KILL cr);
 5684 
 5685   format %{ "bswapl  $dst\n\t"
 5686             "sar     $dst,16\n\t" %}
 5687   ins_encode %{
 5688     __ bswapl($dst$$Register);
 5689     __ sarl($dst$$Register, 16);
 5690   %}
 5691   ins_pipe( ialu_reg );
 5692 %}
 5693 
 5694 //---------- Zeros Count Instructions ------------------------------------------
 5695 
 5696 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 5697   predicate(UseCountLeadingZerosInstruction);
 5698   match(Set dst (CountLeadingZerosI src));
 5699   effect(KILL cr);
 5700 
 5701   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 5702   ins_encode %{
 5703     __ lzcntl($dst$$Register, $src$$Register);
 5704   %}
 5705   ins_pipe(ialu_reg);
 5706 %}
 5707 
 5708 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 5709   predicate(UseCountLeadingZerosInstruction);
 5710   match(Set dst (CountLeadingZerosI (LoadI src)));
 5711   effect(KILL cr);
 5712   ins_cost(175);
 5713   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 5714   ins_encode %{
 5715     __ lzcntl($dst$$Register, $src$$Address);
 5716   %}
 5717   ins_pipe(ialu_reg_mem);
 5718 %}
 5719 
 5720 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 5721   predicate(!UseCountLeadingZerosInstruction);
 5722   match(Set dst (CountLeadingZerosI src));
 5723   effect(KILL cr);
 5724 
 5725   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 5726             "jnz     skip\n\t"
 5727             "movl    $dst, -1\n"
 5728       "skip:\n\t"
 5729             "negl    $dst\n\t"
 5730             "addl    $dst, 31" %}
 5731   ins_encode %{
 5732     Register Rdst = $dst$$Register;
 5733     Register Rsrc = $src$$Register;
 5734     Label skip;
 5735     __ bsrl(Rdst, Rsrc);
 5736     __ jccb(Assembler::notZero, skip);
 5737     __ movl(Rdst, -1);
 5738     __ bind(skip);
 5739     __ negl(Rdst);
 5740     __ addl(Rdst, BitsPerInt - 1);
 5741   %}
 5742   ins_pipe(ialu_reg);
 5743 %}
 5744 
 5745 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 5746   predicate(UseCountLeadingZerosInstruction);
 5747   match(Set dst (CountLeadingZerosL src));
 5748   effect(KILL cr);
 5749 
 5750   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 5751   ins_encode %{
 5752     __ lzcntq($dst$$Register, $src$$Register);
 5753   %}
 5754   ins_pipe(ialu_reg);
 5755 %}
 5756 
 5757 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 5758   predicate(UseCountLeadingZerosInstruction);
 5759   match(Set dst (CountLeadingZerosL (LoadL src)));
 5760   effect(KILL cr);
 5761   ins_cost(175);
 5762   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 5763   ins_encode %{
 5764     __ lzcntq($dst$$Register, $src$$Address);
 5765   %}
 5766   ins_pipe(ialu_reg_mem);
 5767 %}
 5768 
 5769 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 5770   predicate(!UseCountLeadingZerosInstruction);
 5771   match(Set dst (CountLeadingZerosL src));
 5772   effect(KILL cr);
 5773 
 5774   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 5775             "jnz     skip\n\t"
 5776             "movl    $dst, -1\n"
 5777       "skip:\n\t"
 5778             "negl    $dst\n\t"
 5779             "addl    $dst, 63" %}
 5780   ins_encode %{
 5781     Register Rdst = $dst$$Register;
 5782     Register Rsrc = $src$$Register;
 5783     Label skip;
 5784     __ bsrq(Rdst, Rsrc);
 5785     __ jccb(Assembler::notZero, skip);
 5786     __ movl(Rdst, -1);
 5787     __ bind(skip);
 5788     __ negl(Rdst);
 5789     __ addl(Rdst, BitsPerLong - 1);
 5790   %}
 5791   ins_pipe(ialu_reg);
 5792 %}
 5793 
 5794 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 5795   predicate(UseCountTrailingZerosInstruction);
 5796   match(Set dst (CountTrailingZerosI src));
 5797   effect(KILL cr);
 5798 
 5799   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 5800   ins_encode %{
 5801     __ tzcntl($dst$$Register, $src$$Register);
 5802   %}
 5803   ins_pipe(ialu_reg);
 5804 %}
 5805 
 5806 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 5807   predicate(UseCountTrailingZerosInstruction);
 5808   match(Set dst (CountTrailingZerosI (LoadI src)));
 5809   effect(KILL cr);
 5810   ins_cost(175);
 5811   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 5812   ins_encode %{
 5813     __ tzcntl($dst$$Register, $src$$Address);
 5814   %}
 5815   ins_pipe(ialu_reg_mem);
 5816 %}
 5817 
 5818 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 5819   predicate(!UseCountTrailingZerosInstruction);
 5820   match(Set dst (CountTrailingZerosI src));
 5821   effect(KILL cr);
 5822 
 5823   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 5824             "jnz     done\n\t"
 5825             "movl    $dst, 32\n"
 5826       "done:" %}
 5827   ins_encode %{
 5828     Register Rdst = $dst$$Register;
 5829     Label done;
 5830     __ bsfl(Rdst, $src$$Register);
 5831     __ jccb(Assembler::notZero, done);
 5832     __ movl(Rdst, BitsPerInt);
 5833     __ bind(done);
 5834   %}
 5835   ins_pipe(ialu_reg);
 5836 %}
 5837 
 5838 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 5839   predicate(UseCountTrailingZerosInstruction);
 5840   match(Set dst (CountTrailingZerosL src));
 5841   effect(KILL cr);
 5842 
 5843   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 5844   ins_encode %{
 5845     __ tzcntq($dst$$Register, $src$$Register);
 5846   %}
 5847   ins_pipe(ialu_reg);
 5848 %}
 5849 
 5850 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 5851   predicate(UseCountTrailingZerosInstruction);
 5852   match(Set dst (CountTrailingZerosL (LoadL src)));
 5853   effect(KILL cr);
 5854   ins_cost(175);
 5855   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 5856   ins_encode %{
 5857     __ tzcntq($dst$$Register, $src$$Address);
 5858   %}
 5859   ins_pipe(ialu_reg_mem);
 5860 %}
 5861 
 5862 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 5863   predicate(!UseCountTrailingZerosInstruction);
 5864   match(Set dst (CountTrailingZerosL src));
 5865   effect(KILL cr);
 5866 
 5867   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 5868             "jnz     done\n\t"
 5869             "movl    $dst, 64\n"
 5870       "done:" %}
 5871   ins_encode %{
 5872     Register Rdst = $dst$$Register;
 5873     Label done;
 5874     __ bsfq(Rdst, $src$$Register);
 5875     __ jccb(Assembler::notZero, done);
 5876     __ movl(Rdst, BitsPerLong);
 5877     __ bind(done);
 5878   %}
 5879   ins_pipe(ialu_reg);
 5880 %}
 5881 
 5882 //--------------- Reverse Operation Instructions ----------------
 5883 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 5884   predicate(!VM_Version::supports_gfni());
 5885   match(Set dst (ReverseI src));
 5886   effect(TEMP dst, TEMP rtmp, KILL cr);
 5887   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 5888   ins_encode %{
 5889     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 5890   %}
 5891   ins_pipe( ialu_reg );
 5892 %}
 5893 
 5894 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 5895   predicate(VM_Version::supports_gfni());
 5896   match(Set dst (ReverseI src));
 5897   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 5898   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 5899   ins_encode %{
 5900     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 5901   %}
 5902   ins_pipe( ialu_reg );
 5903 %}
 5904 
 5905 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 5906   predicate(!VM_Version::supports_gfni());
 5907   match(Set dst (ReverseL src));
 5908   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 5909   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 5910   ins_encode %{
 5911     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 5912   %}
 5913   ins_pipe( ialu_reg );
 5914 %}
 5915 
 5916 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 5917   predicate(VM_Version::supports_gfni());
 5918   match(Set dst (ReverseL src));
 5919   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 5920   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 5921   ins_encode %{
 5922     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 5923   %}
 5924   ins_pipe( ialu_reg );
 5925 %}
 5926 
 5927 //---------- Population Count Instructions -------------------------------------
 5928 
 5929 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 5930   predicate(UsePopCountInstruction);
 5931   match(Set dst (PopCountI src));
 5932   effect(KILL cr);
 5933 
 5934   format %{ "popcnt  $dst, $src" %}
 5935   ins_encode %{
 5936     __ popcntl($dst$$Register, $src$$Register);
 5937   %}
 5938   ins_pipe(ialu_reg);
 5939 %}
 5940 
 5941 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 5942   predicate(UsePopCountInstruction);
 5943   match(Set dst (PopCountI (LoadI mem)));
 5944   effect(KILL cr);
 5945 
 5946   format %{ "popcnt  $dst, $mem" %}
 5947   ins_encode %{
 5948     __ popcntl($dst$$Register, $mem$$Address);
 5949   %}
 5950   ins_pipe(ialu_reg);
 5951 %}
 5952 
 5953 // Note: Long.bitCount(long) returns an int.
 5954 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 5955   predicate(UsePopCountInstruction);
 5956   match(Set dst (PopCountL src));
 5957   effect(KILL cr);
 5958 
 5959   format %{ "popcnt  $dst, $src" %}
 5960   ins_encode %{
 5961     __ popcntq($dst$$Register, $src$$Register);
 5962   %}
 5963   ins_pipe(ialu_reg);
 5964 %}
 5965 
 5966 // Note: Long.bitCount(long) returns an int.
 5967 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 5968   predicate(UsePopCountInstruction);
 5969   match(Set dst (PopCountL (LoadL mem)));
 5970   effect(KILL cr);
 5971 
 5972   format %{ "popcnt  $dst, $mem" %}
 5973   ins_encode %{
 5974     __ popcntq($dst$$Register, $mem$$Address);
 5975   %}
 5976   ins_pipe(ialu_reg);
 5977 %}
 5978 
 5979 
 5980 //----------MemBar Instructions-----------------------------------------------
 5981 // Memory barrier flavors
 5982 
 5983 instruct membar_acquire()
 5984 %{
 5985   match(MemBarAcquire);
 5986   match(LoadFence);
 5987   ins_cost(0);
 5988 
 5989   size(0);
 5990   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 5991   ins_encode();
 5992   ins_pipe(empty);
 5993 %}
 5994 
 5995 instruct membar_acquire_lock()
 5996 %{
 5997   match(MemBarAcquireLock);
 5998   ins_cost(0);
 5999 
 6000   size(0);
 6001   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6002   ins_encode();
 6003   ins_pipe(empty);
 6004 %}
 6005 
 6006 instruct membar_release()
 6007 %{
 6008   match(MemBarRelease);
 6009   match(StoreFence);
 6010   ins_cost(0);
 6011 
 6012   size(0);
 6013   format %{ "MEMBAR-release ! (empty encoding)" %}
 6014   ins_encode();
 6015   ins_pipe(empty);
 6016 %}
 6017 
 6018 instruct membar_release_lock()
 6019 %{
 6020   match(MemBarReleaseLock);
 6021   ins_cost(0);
 6022 
 6023   size(0);
 6024   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6025   ins_encode();
 6026   ins_pipe(empty);
 6027 %}
 6028 
 6029 instruct membar_volatile(rFlagsReg cr) %{
 6030   match(MemBarVolatile);
 6031   effect(KILL cr);
 6032   ins_cost(400);
 6033 
 6034   format %{
 6035     $$template
 6036     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6037   %}
 6038   ins_encode %{
 6039     __ membar(Assembler::StoreLoad);
 6040   %}
 6041   ins_pipe(pipe_slow);
 6042 %}
 6043 
 6044 instruct unnecessary_membar_volatile()
 6045 %{
 6046   match(MemBarVolatile);
 6047   predicate(Matcher::post_store_load_barrier(n));
 6048   ins_cost(0);
 6049 
 6050   size(0);
 6051   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6052   ins_encode();
 6053   ins_pipe(empty);
 6054 %}
 6055 
 6056 instruct membar_storestore() %{
 6057   match(MemBarStoreStore);
 6058   match(StoreStoreFence);
 6059   ins_cost(0);
 6060 
 6061   size(0);
 6062   format %{ "MEMBAR-storestore (empty encoding)" %}
 6063   ins_encode( );
 6064   ins_pipe(empty);
 6065 %}
 6066 
 6067 //----------Move Instructions--------------------------------------------------
 6068 
 6069 instruct castX2P(rRegP dst, rRegL src)
 6070 %{
 6071   match(Set dst (CastX2P src));
 6072 
 6073   format %{ "movq    $dst, $src\t# long->ptr" %}
 6074   ins_encode %{
 6075     if ($dst$$reg != $src$$reg) {
 6076       __ movptr($dst$$Register, $src$$Register);
 6077     }
 6078   %}
 6079   ins_pipe(ialu_reg_reg); // XXX
 6080 %}
 6081 
 6082 instruct castN2X(rRegL dst, rRegN src)
 6083 %{
 6084   match(Set dst (CastP2X src));
 6085 
 6086   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6087   ins_encode %{
 6088     if ($dst$$reg != $src$$reg) {
 6089       __ movptr($dst$$Register, $src$$Register);
 6090     }
 6091   %}
 6092   ins_pipe(ialu_reg_reg); // XXX
 6093 %}
 6094 
 6095 instruct castP2X(rRegL dst, rRegP src)
 6096 %{
 6097   match(Set dst (CastP2X src));
 6098 
 6099   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6100   ins_encode %{
 6101     if ($dst$$reg != $src$$reg) {
 6102       __ movptr($dst$$Register, $src$$Register);
 6103     }
 6104   %}
 6105   ins_pipe(ialu_reg_reg); // XXX
 6106 %}
 6107 
 6108 // Convert oop into int for vectors alignment masking
 6109 instruct convP2I(rRegI dst, rRegP src)
 6110 %{
 6111   match(Set dst (ConvL2I (CastP2X src)));
 6112 
 6113   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6114   ins_encode %{
 6115     __ movl($dst$$Register, $src$$Register);
 6116   %}
 6117   ins_pipe(ialu_reg_reg); // XXX
 6118 %}
 6119 
 6120 // Convert compressed oop into int for vectors alignment masking
 6121 // in case of 32bit oops (heap < 4Gb).
 6122 instruct convN2I(rRegI dst, rRegN src)
 6123 %{
 6124   predicate(CompressedOops::shift() == 0);
 6125   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6126 
 6127   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6128   ins_encode %{
 6129     __ movl($dst$$Register, $src$$Register);
 6130   %}
 6131   ins_pipe(ialu_reg_reg); // XXX
 6132 %}
 6133 
 6134 // Convert oop pointer into compressed form
 6135 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 6136   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 6137   match(Set dst (EncodeP src));
 6138   effect(KILL cr);
 6139   format %{ "encode_heap_oop $dst,$src" %}
 6140   ins_encode %{
 6141     Register s = $src$$Register;
 6142     Register d = $dst$$Register;
 6143     if (s != d) {
 6144       __ movq(d, s);
 6145     }
 6146     __ encode_heap_oop(d);
 6147   %}
 6148   ins_pipe(ialu_reg_long);
 6149 %}
 6150 
 6151 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 6152   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 6153   match(Set dst (EncodeP src));
 6154   effect(KILL cr);
 6155   format %{ "encode_heap_oop_not_null $dst,$src" %}
 6156   ins_encode %{
 6157     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 6158   %}
 6159   ins_pipe(ialu_reg_long);
 6160 %}
 6161 
 6162 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 6163   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 6164             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 6165   match(Set dst (DecodeN src));
 6166   effect(KILL cr);
 6167   format %{ "decode_heap_oop $dst,$src" %}
 6168   ins_encode %{
 6169     Register s = $src$$Register;
 6170     Register d = $dst$$Register;
 6171     if (s != d) {
 6172       __ movq(d, s);
 6173     }
 6174     __ decode_heap_oop(d);
 6175   %}
 6176   ins_pipe(ialu_reg_long);
 6177 %}
 6178 
 6179 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 6180   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 6181             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 6182   match(Set dst (DecodeN src));
 6183   effect(KILL cr);
 6184   format %{ "decode_heap_oop_not_null $dst,$src" %}
 6185   ins_encode %{
 6186     Register s = $src$$Register;
 6187     Register d = $dst$$Register;
 6188     if (s != d) {
 6189       __ decode_heap_oop_not_null(d, s);
 6190     } else {
 6191       __ decode_heap_oop_not_null(d);
 6192     }
 6193   %}
 6194   ins_pipe(ialu_reg_long);
 6195 %}
 6196 
 6197 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 6198   match(Set dst (EncodePKlass src));
 6199   effect(TEMP dst, KILL cr);
 6200   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 6201   ins_encode %{
 6202     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 6203   %}
 6204   ins_pipe(ialu_reg_long);
 6205 %}
 6206 
 6207 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 6208   match(Set dst (DecodeNKlass src));
 6209   effect(TEMP dst, KILL cr);
 6210   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 6211   ins_encode %{
 6212     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 6213   %}
 6214   ins_pipe(ialu_reg_long);
 6215 %}
 6216 
 6217 //----------Conditional Move---------------------------------------------------
 6218 // Jump
 6219 // dummy instruction for generating temp registers
 6220 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 6221   match(Jump (LShiftL switch_val shift));
 6222   ins_cost(350);
 6223   predicate(false);
 6224   effect(TEMP dest);
 6225 
 6226   format %{ "leaq    $dest, [$constantaddress]\n\t"
 6227             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 6228   ins_encode %{
 6229     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 6230     // to do that and the compiler is using that register as one it can allocate.
 6231     // So we build it all by hand.
 6232     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 6233     // ArrayAddress dispatch(table, index);
 6234     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 6235     __ lea($dest$$Register, $constantaddress);
 6236     __ jmp(dispatch);
 6237   %}
 6238   ins_pipe(pipe_jmp);
 6239 %}
 6240 
 6241 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 6242   match(Jump (AddL (LShiftL switch_val shift) offset));
 6243   ins_cost(350);
 6244   effect(TEMP dest);
 6245 
 6246   format %{ "leaq    $dest, [$constantaddress]\n\t"
 6247             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 6248   ins_encode %{
 6249     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 6250     // to do that and the compiler is using that register as one it can allocate.
 6251     // So we build it all by hand.
 6252     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 6253     // ArrayAddress dispatch(table, index);
 6254     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 6255     __ lea($dest$$Register, $constantaddress);
 6256     __ jmp(dispatch);
 6257   %}
 6258   ins_pipe(pipe_jmp);
 6259 %}
 6260 
 6261 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 6262   match(Jump switch_val);
 6263   ins_cost(350);
 6264   effect(TEMP dest);
 6265 
 6266   format %{ "leaq    $dest, [$constantaddress]\n\t"
 6267             "jmp     [$dest + $switch_val]\n\t" %}
 6268   ins_encode %{
 6269     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 6270     // to do that and the compiler is using that register as one it can allocate.
 6271     // So we build it all by hand.
 6272     // Address index(noreg, switch_reg, Address::times_1);
 6273     // ArrayAddress dispatch(table, index);
 6274     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 6275     __ lea($dest$$Register, $constantaddress);
 6276     __ jmp(dispatch);
 6277   %}
 6278   ins_pipe(pipe_jmp);
 6279 %}
 6280 
 6281 // Conditional move
 6282 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 6283 %{
 6284   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 6285   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 6286 
 6287   ins_cost(100); // XXX
 6288   format %{ "setbn$cop $dst\t# signed, int" %}
 6289   ins_encode %{
 6290     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 6291     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 6292   %}
 6293   ins_pipe(ialu_reg);
 6294 %}
 6295 
 6296 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 6297 %{
 6298   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6299 
 6300   ins_cost(200); // XXX
 6301   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 6302   ins_encode %{
 6303     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6304   %}
 6305   ins_pipe(pipe_cmov_reg);
 6306 %}
 6307 
 6308 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 6309 %{
 6310   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 6311   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 6312 
 6313   ins_cost(100); // XXX
 6314   format %{ "setbn$cop $dst\t# unsigned, int" %}
 6315   ins_encode %{
 6316     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 6317     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 6318   %}
 6319   ins_pipe(ialu_reg);
 6320 %}
 6321 
 6322 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 6323   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6324 
 6325   ins_cost(200); // XXX
 6326   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 6327   ins_encode %{
 6328     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6329   %}
 6330   ins_pipe(pipe_cmov_reg);
 6331 %}
 6332 
 6333 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 6334 %{
 6335   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 6336   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 6337 
 6338   ins_cost(100); // XXX
 6339   format %{ "setbn$cop $dst\t# unsigned, int" %}
 6340   ins_encode %{
 6341     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 6342     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 6343   %}
 6344   ins_pipe(ialu_reg);
 6345 %}
 6346 
 6347 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 6348   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6349   ins_cost(200);
 6350   expand %{
 6351     cmovI_regU(cop, cr, dst, src);
 6352   %}
 6353 %}
 6354 
 6355 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 6356   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 6357   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6358 
 6359   ins_cost(200); // XXX
 6360   format %{ "cmovpl  $dst, $src\n\t"
 6361             "cmovnel $dst, $src" %}
 6362   ins_encode %{
 6363     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 6364     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 6365   %}
 6366   ins_pipe(pipe_cmov_reg);
 6367 %}
 6368 
 6369 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 6370 // inputs of the CMove
 6371 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 6372   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 6373   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 6374 
 6375   ins_cost(200); // XXX
 6376   format %{ "cmovpl  $dst, $src\n\t"
 6377             "cmovnel $dst, $src" %}
 6378   ins_encode %{
 6379     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 6380     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 6381   %}
 6382   ins_pipe(pipe_cmov_reg);
 6383 %}
 6384 
 6385 // Conditional move
 6386 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 6387   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6388 
 6389   ins_cost(250); // XXX
 6390   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 6391   ins_encode %{
 6392     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 6393   %}
 6394   ins_pipe(pipe_cmov_mem);
 6395 %}
 6396 
 6397 // Conditional move
 6398 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 6399 %{
 6400   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6401 
 6402   ins_cost(250); // XXX
 6403   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 6404   ins_encode %{
 6405     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 6406   %}
 6407   ins_pipe(pipe_cmov_mem);
 6408 %}
 6409 
 6410 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 6411   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6412   ins_cost(250);
 6413   expand %{
 6414     cmovI_memU(cop, cr, dst, src);
 6415   %}
 6416 %}
 6417 
 6418 // Conditional move
 6419 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 6420 %{
 6421   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 6422 
 6423   ins_cost(200); // XXX
 6424   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 6425   ins_encode %{
 6426     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6427   %}
 6428   ins_pipe(pipe_cmov_reg);
 6429 %}
 6430 
 6431 // Conditional move
 6432 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 6433 %{
 6434   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 6435 
 6436   ins_cost(200); // XXX
 6437   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 6438   ins_encode %{
 6439     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6440   %}
 6441   ins_pipe(pipe_cmov_reg);
 6442 %}
 6443 
 6444 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 6445   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 6446   ins_cost(200);
 6447   expand %{
 6448     cmovN_regU(cop, cr, dst, src);
 6449   %}
 6450 %}
 6451 
 6452 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 6453   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 6454   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 6455 
 6456   ins_cost(200); // XXX
 6457   format %{ "cmovpl  $dst, $src\n\t"
 6458             "cmovnel $dst, $src" %}
 6459   ins_encode %{
 6460     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 6461     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 6462   %}
 6463   ins_pipe(pipe_cmov_reg);
 6464 %}
 6465 
 6466 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 6467 // inputs of the CMove
 6468 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 6469   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 6470   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 6471 
 6472   ins_cost(200); // XXX
 6473   format %{ "cmovpl  $dst, $src\n\t"
 6474             "cmovnel $dst, $src" %}
 6475   ins_encode %{
 6476     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 6477     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 6478   %}
 6479   ins_pipe(pipe_cmov_reg);
 6480 %}
 6481 
 6482 // Conditional move
 6483 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 6484 %{
 6485   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6486 
 6487   ins_cost(200); // XXX
 6488   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 6489   ins_encode %{
 6490     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6491   %}
 6492   ins_pipe(pipe_cmov_reg);  // XXX
 6493 %}
 6494 
 6495 // Conditional move
 6496 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 6497 %{
 6498   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6499 
 6500   ins_cost(200); // XXX
 6501   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 6502   ins_encode %{
 6503     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6504   %}
 6505   ins_pipe(pipe_cmov_reg); // XXX
 6506 %}
 6507 
 6508 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 6509   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6510   ins_cost(200);
 6511   expand %{
 6512     cmovP_regU(cop, cr, dst, src);
 6513   %}
 6514 %}
 6515 
 6516 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 6517   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 6518   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6519 
 6520   ins_cost(200); // XXX
 6521   format %{ "cmovpq  $dst, $src\n\t"
 6522             "cmovneq $dst, $src" %}
 6523   ins_encode %{
 6524     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 6525     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 6526   %}
 6527   ins_pipe(pipe_cmov_reg);
 6528 %}
 6529 
 6530 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 6531 // inputs of the CMove
 6532 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 6533   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 6534   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 6535 
 6536   ins_cost(200); // XXX
 6537   format %{ "cmovpq  $dst, $src\n\t"
 6538             "cmovneq $dst, $src" %}
 6539   ins_encode %{
 6540     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 6541     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 6542   %}
 6543   ins_pipe(pipe_cmov_reg);
 6544 %}
 6545 
 6546 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 6547 %{
 6548   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 6549   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 6550 
 6551   ins_cost(100); // XXX
 6552   format %{ "setbn$cop $dst\t# signed, long" %}
 6553   ins_encode %{
 6554     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 6555     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 6556   %}
 6557   ins_pipe(ialu_reg);
 6558 %}
 6559 
 6560 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 6561 %{
 6562   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6563 
 6564   ins_cost(200); // XXX
 6565   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 6566   ins_encode %{
 6567     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6568   %}
 6569   ins_pipe(pipe_cmov_reg);  // XXX
 6570 %}
 6571 
 6572 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 6573 %{
 6574   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 6575 
 6576   ins_cost(200); // XXX
 6577   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 6578   ins_encode %{
 6579     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 6580   %}
 6581   ins_pipe(pipe_cmov_mem);  // XXX
 6582 %}
 6583 
 6584 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 6585 %{
 6586   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 6587   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 6588 
 6589   ins_cost(100); // XXX
 6590   format %{ "setbn$cop $dst\t# unsigned, long" %}
 6591   ins_encode %{
 6592     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 6593     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 6594   %}
 6595   ins_pipe(ialu_reg);
 6596 %}
 6597 
 6598 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 6599 %{
 6600   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6601 
 6602   ins_cost(200); // XXX
 6603   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 6604   ins_encode %{
 6605     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 6606   %}
 6607   ins_pipe(pipe_cmov_reg); // XXX
 6608 %}
 6609 
 6610 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 6611 %{
 6612   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 6613   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 6614 
 6615   ins_cost(100); // XXX
 6616   format %{ "setbn$cop $dst\t# unsigned, long" %}
 6617   ins_encode %{
 6618     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 6619     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 6620   %}
 6621   ins_pipe(ialu_reg);
 6622 %}
 6623 
 6624 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 6625   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6626   ins_cost(200);
 6627   expand %{
 6628     cmovL_regU(cop, cr, dst, src);
 6629   %}
 6630 %}
 6631 
 6632 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 6633   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 6634   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6635 
 6636   ins_cost(200); // XXX
 6637   format %{ "cmovpq  $dst, $src\n\t"
 6638             "cmovneq $dst, $src" %}
 6639   ins_encode %{
 6640     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 6641     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 6642   %}
 6643   ins_pipe(pipe_cmov_reg);
 6644 %}
 6645 
 6646 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 6647 // inputs of the CMove
 6648 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 6649   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 6650   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 6651 
 6652   ins_cost(200); // XXX
 6653   format %{ "cmovpq  $dst, $src\n\t"
 6654             "cmovneq $dst, $src" %}
 6655   ins_encode %{
 6656     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 6657     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 6658   %}
 6659   ins_pipe(pipe_cmov_reg);
 6660 %}
 6661 
 6662 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 6663 %{
 6664   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 6665 
 6666   ins_cost(200); // XXX
 6667   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 6668   ins_encode %{
 6669     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 6670   %}
 6671   ins_pipe(pipe_cmov_mem); // XXX
 6672 %}
 6673 
 6674 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 6675   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 6676   ins_cost(200);
 6677   expand %{
 6678     cmovL_memU(cop, cr, dst, src);
 6679   %}
 6680 %}
 6681 
 6682 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 6683 %{
 6684   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6685 
 6686   ins_cost(200); // XXX
 6687   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 6688             "movss     $dst, $src\n"
 6689     "skip:" %}
 6690   ins_encode %{
 6691     Label Lskip;
 6692     // Invert sense of branch from sense of CMOV
 6693     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6694     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6695     __ bind(Lskip);
 6696   %}
 6697   ins_pipe(pipe_slow);
 6698 %}
 6699 
 6700 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 6701 %{
 6702   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6703 
 6704   ins_cost(200); // XXX
 6705   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 6706             "movss     $dst, $src\n"
 6707     "skip:" %}
 6708   ins_encode %{
 6709     Label Lskip;
 6710     // Invert sense of branch from sense of CMOV
 6711     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6712     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6713     __ bind(Lskip);
 6714   %}
 6715   ins_pipe(pipe_slow);
 6716 %}
 6717 
 6718 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 6719   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6720   ins_cost(200);
 6721   expand %{
 6722     cmovF_regU(cop, cr, dst, src);
 6723   %}
 6724 %}
 6725 
 6726 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 6727 %{
 6728   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6729 
 6730   ins_cost(200); // XXX
 6731   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 6732             "movsd     $dst, $src\n"
 6733     "skip:" %}
 6734   ins_encode %{
 6735     Label Lskip;
 6736     // Invert sense of branch from sense of CMOV
 6737     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6738     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6739     __ bind(Lskip);
 6740   %}
 6741   ins_pipe(pipe_slow);
 6742 %}
 6743 
 6744 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 6745 %{
 6746   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6747 
 6748   ins_cost(200); // XXX
 6749   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 6750             "movsd     $dst, $src\n"
 6751     "skip:" %}
 6752   ins_encode %{
 6753     Label Lskip;
 6754     // Invert sense of branch from sense of CMOV
 6755     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6756     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6757     __ bind(Lskip);
 6758   %}
 6759   ins_pipe(pipe_slow);
 6760 %}
 6761 
 6762 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 6763   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6764   ins_cost(200);
 6765   expand %{
 6766     cmovD_regU(cop, cr, dst, src);
 6767   %}
 6768 %}
 6769 
 6770 //----------Arithmetic Instructions--------------------------------------------
 6771 //----------Addition Instructions----------------------------------------------
 6772 
 6773 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 6774 %{
 6775   match(Set dst (AddI dst src));
 6776   effect(KILL cr);
 6777   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6778   format %{ "addl    $dst, $src\t# int" %}
 6779   ins_encode %{
 6780     __ addl($dst$$Register, $src$$Register);
 6781   %}
 6782   ins_pipe(ialu_reg_reg);
 6783 %}
 6784 
 6785 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 6786 %{
 6787   match(Set dst (AddI dst src));
 6788   effect(KILL cr);
 6789   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6790 
 6791   format %{ "addl    $dst, $src\t# int" %}
 6792   ins_encode %{
 6793     __ addl($dst$$Register, $src$$constant);
 6794   %}
 6795   ins_pipe( ialu_reg );
 6796 %}
 6797 
 6798 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 6799 %{
 6800   match(Set dst (AddI dst (LoadI src)));
 6801   effect(KILL cr);
 6802   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6803 
 6804   ins_cost(150); // XXX
 6805   format %{ "addl    $dst, $src\t# int" %}
 6806   ins_encode %{
 6807     __ addl($dst$$Register, $src$$Address);
 6808   %}
 6809   ins_pipe(ialu_reg_mem);
 6810 %}
 6811 
 6812 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 6813 %{
 6814   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6815   effect(KILL cr);
 6816   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6817 
 6818   ins_cost(150); // XXX
 6819   format %{ "addl    $dst, $src\t# int" %}
 6820   ins_encode %{
 6821     __ addl($dst$$Address, $src$$Register);
 6822   %}
 6823   ins_pipe(ialu_mem_reg);
 6824 %}
 6825 
 6826 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 6827 %{
 6828   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6829   effect(KILL cr);
 6830   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6831 
 6832 
 6833   ins_cost(125); // XXX
 6834   format %{ "addl    $dst, $src\t# int" %}
 6835   ins_encode %{
 6836     __ addl($dst$$Address, $src$$constant);
 6837   %}
 6838   ins_pipe(ialu_mem_imm);
 6839 %}
 6840 
 6841 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 6842 %{
 6843   predicate(UseIncDec);
 6844   match(Set dst (AddI dst src));
 6845   effect(KILL cr);
 6846 
 6847   format %{ "incl    $dst\t# int" %}
 6848   ins_encode %{
 6849     __ incrementl($dst$$Register);
 6850   %}
 6851   ins_pipe(ialu_reg);
 6852 %}
 6853 
 6854 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 6855 %{
 6856   predicate(UseIncDec);
 6857   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6858   effect(KILL cr);
 6859 
 6860   ins_cost(125); // XXX
 6861   format %{ "incl    $dst\t# int" %}
 6862   ins_encode %{
 6863     __ incrementl($dst$$Address);
 6864   %}
 6865   ins_pipe(ialu_mem_imm);
 6866 %}
 6867 
 6868 // XXX why does that use AddI
 6869 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 6870 %{
 6871   predicate(UseIncDec);
 6872   match(Set dst (AddI dst src));
 6873   effect(KILL cr);
 6874 
 6875   format %{ "decl    $dst\t# int" %}
 6876   ins_encode %{
 6877     __ decrementl($dst$$Register);
 6878   %}
 6879   ins_pipe(ialu_reg);
 6880 %}
 6881 
 6882 // XXX why does that use AddI
 6883 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 6884 %{
 6885   predicate(UseIncDec);
 6886   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6887   effect(KILL cr);
 6888 
 6889   ins_cost(125); // XXX
 6890   format %{ "decl    $dst\t# int" %}
 6891   ins_encode %{
 6892     __ decrementl($dst$$Address);
 6893   %}
 6894   ins_pipe(ialu_mem_imm);
 6895 %}
 6896 
 6897 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 6898 %{
 6899   predicate(VM_Version::supports_fast_2op_lea());
 6900   match(Set dst (AddI (LShiftI index scale) disp));
 6901 
 6902   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 6903   ins_encode %{
 6904     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 6905     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 6906   %}
 6907   ins_pipe(ialu_reg_reg);
 6908 %}
 6909 
 6910 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 6911 %{
 6912   predicate(VM_Version::supports_fast_3op_lea());
 6913   match(Set dst (AddI (AddI base index) disp));
 6914 
 6915   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 6916   ins_encode %{
 6917     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 6918   %}
 6919   ins_pipe(ialu_reg_reg);
 6920 %}
 6921 
 6922 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 6923 %{
 6924   predicate(VM_Version::supports_fast_2op_lea());
 6925   match(Set dst (AddI base (LShiftI index scale)));
 6926 
 6927   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 6928   ins_encode %{
 6929     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 6930     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 6931   %}
 6932   ins_pipe(ialu_reg_reg);
 6933 %}
 6934 
 6935 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 6936 %{
 6937   predicate(VM_Version::supports_fast_3op_lea());
 6938   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 6939 
 6940   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 6941   ins_encode %{
 6942     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 6943     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 6944   %}
 6945   ins_pipe(ialu_reg_reg);
 6946 %}
 6947 
 6948 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 6949 %{
 6950   match(Set dst (AddL dst src));
 6951   effect(KILL cr);
 6952   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6953 
 6954   format %{ "addq    $dst, $src\t# long" %}
 6955   ins_encode %{
 6956     __ addq($dst$$Register, $src$$Register);
 6957   %}
 6958   ins_pipe(ialu_reg_reg);
 6959 %}
 6960 
 6961 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 6962 %{
 6963   match(Set dst (AddL dst src));
 6964   effect(KILL cr);
 6965   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6966 
 6967   format %{ "addq    $dst, $src\t# long" %}
 6968   ins_encode %{
 6969     __ addq($dst$$Register, $src$$constant);
 6970   %}
 6971   ins_pipe( ialu_reg );
 6972 %}
 6973 
 6974 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 6975 %{
 6976   match(Set dst (AddL dst (LoadL src)));
 6977   effect(KILL cr);
 6978   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6979 
 6980   ins_cost(150); // XXX
 6981   format %{ "addq    $dst, $src\t# long" %}
 6982   ins_encode %{
 6983     __ addq($dst$$Register, $src$$Address);
 6984   %}
 6985   ins_pipe(ialu_reg_mem);
 6986 %}
 6987 
 6988 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 6989 %{
 6990   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 6991   effect(KILL cr);
 6992   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 6993 
 6994   ins_cost(150); // XXX
 6995   format %{ "addq    $dst, $src\t# long" %}
 6996   ins_encode %{
 6997     __ addq($dst$$Address, $src$$Register);
 6998   %}
 6999   ins_pipe(ialu_mem_reg);
 7000 %}
 7001 
 7002 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7003 %{
 7004   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7005   effect(KILL cr);
 7006   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7007 
 7008   ins_cost(125); // XXX
 7009   format %{ "addq    $dst, $src\t# long" %}
 7010   ins_encode %{
 7011     __ addq($dst$$Address, $src$$constant);
 7012   %}
 7013   ins_pipe(ialu_mem_imm);
 7014 %}
 7015 
 7016 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7017 %{
 7018   predicate(UseIncDec);
 7019   match(Set dst (AddL dst src));
 7020   effect(KILL cr);
 7021 
 7022   format %{ "incq    $dst\t# long" %}
 7023   ins_encode %{
 7024     __ incrementq($dst$$Register);
 7025   %}
 7026   ins_pipe(ialu_reg);
 7027 %}
 7028 
 7029 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7030 %{
 7031   predicate(UseIncDec);
 7032   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7033   effect(KILL cr);
 7034 
 7035   ins_cost(125); // XXX
 7036   format %{ "incq    $dst\t# long" %}
 7037   ins_encode %{
 7038     __ incrementq($dst$$Address);
 7039   %}
 7040   ins_pipe(ialu_mem_imm);
 7041 %}
 7042 
 7043 // XXX why does that use AddL
 7044 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7045 %{
 7046   predicate(UseIncDec);
 7047   match(Set dst (AddL dst src));
 7048   effect(KILL cr);
 7049 
 7050   format %{ "decq    $dst\t# long" %}
 7051   ins_encode %{
 7052     __ decrementq($dst$$Register);
 7053   %}
 7054   ins_pipe(ialu_reg);
 7055 %}
 7056 
 7057 // XXX why does that use AddL
 7058 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7059 %{
 7060   predicate(UseIncDec);
 7061   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7062   effect(KILL cr);
 7063 
 7064   ins_cost(125); // XXX
 7065   format %{ "decq    $dst\t# long" %}
 7066   ins_encode %{
 7067     __ decrementq($dst$$Address);
 7068   %}
 7069   ins_pipe(ialu_mem_imm);
 7070 %}
 7071 
 7072 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7073 %{
 7074   predicate(VM_Version::supports_fast_2op_lea());
 7075   match(Set dst (AddL (LShiftL index scale) disp));
 7076 
 7077   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7078   ins_encode %{
 7079     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7080     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7081   %}
 7082   ins_pipe(ialu_reg_reg);
 7083 %}
 7084 
 7085 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7086 %{
 7087   predicate(VM_Version::supports_fast_3op_lea());
 7088   match(Set dst (AddL (AddL base index) disp));
 7089 
 7090   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7091   ins_encode %{
 7092     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7093   %}
 7094   ins_pipe(ialu_reg_reg);
 7095 %}
 7096 
 7097 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7098 %{
 7099   predicate(VM_Version::supports_fast_2op_lea());
 7100   match(Set dst (AddL base (LShiftL index scale)));
 7101 
 7102   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 7103   ins_encode %{
 7104     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7105     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7106   %}
 7107   ins_pipe(ialu_reg_reg);
 7108 %}
 7109 
 7110 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 7111 %{
 7112   predicate(VM_Version::supports_fast_3op_lea());
 7113   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 7114 
 7115   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 7116   ins_encode %{
 7117     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7118     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7119   %}
 7120   ins_pipe(ialu_reg_reg);
 7121 %}
 7122 
 7123 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 7124 %{
 7125   match(Set dst (AddP dst src));
 7126   effect(KILL cr);
 7127   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7128 
 7129   format %{ "addq    $dst, $src\t# ptr" %}
 7130   ins_encode %{
 7131     __ addq($dst$$Register, $src$$Register);
 7132   %}
 7133   ins_pipe(ialu_reg_reg);
 7134 %}
 7135 
 7136 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 7137 %{
 7138   match(Set dst (AddP dst src));
 7139   effect(KILL cr);
 7140   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7141 
 7142   format %{ "addq    $dst, $src\t# ptr" %}
 7143   ins_encode %{
 7144     __ addq($dst$$Register, $src$$constant);
 7145   %}
 7146   ins_pipe( ialu_reg );
 7147 %}
 7148 
 7149 // XXX addP mem ops ????
 7150 
 7151 instruct checkCastPP(rRegP dst)
 7152 %{
 7153   match(Set dst (CheckCastPP dst));
 7154 
 7155   size(0);
 7156   format %{ "# checkcastPP of $dst" %}
 7157   ins_encode(/* empty encoding */);
 7158   ins_pipe(empty);
 7159 %}
 7160 
 7161 instruct castPP(rRegP dst)
 7162 %{
 7163   match(Set dst (CastPP dst));
 7164 
 7165   size(0);
 7166   format %{ "# castPP of $dst" %}
 7167   ins_encode(/* empty encoding */);
 7168   ins_pipe(empty);
 7169 %}
 7170 
 7171 instruct castII(rRegI dst)
 7172 %{
 7173   match(Set dst (CastII dst));
 7174 
 7175   size(0);
 7176   format %{ "# castII of $dst" %}
 7177   ins_encode(/* empty encoding */);
 7178   ins_cost(0);
 7179   ins_pipe(empty);
 7180 %}
 7181 
 7182 instruct castLL(rRegL dst)
 7183 %{
 7184   match(Set dst (CastLL dst));
 7185 
 7186   size(0);
 7187   format %{ "# castLL of $dst" %}
 7188   ins_encode(/* empty encoding */);
 7189   ins_cost(0);
 7190   ins_pipe(empty);
 7191 %}
 7192 
 7193 instruct castFF(regF dst)
 7194 %{
 7195   match(Set dst (CastFF dst));
 7196 
 7197   size(0);
 7198   format %{ "# castFF of $dst" %}
 7199   ins_encode(/* empty encoding */);
 7200   ins_cost(0);
 7201   ins_pipe(empty);
 7202 %}
 7203 
 7204 instruct castDD(regD dst)
 7205 %{
 7206   match(Set dst (CastDD dst));
 7207 
 7208   size(0);
 7209   format %{ "# castDD of $dst" %}
 7210   ins_encode(/* empty encoding */);
 7211   ins_cost(0);
 7212   ins_pipe(empty);
 7213 %}
 7214 
 7215 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7216 instruct compareAndSwapP(rRegI res,
 7217                          memory mem_ptr,
 7218                          rax_RegP oldval, rRegP newval,
 7219                          rFlagsReg cr)
 7220 %{
 7221   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 7222   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7223   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7224   effect(KILL cr, KILL oldval);
 7225 
 7226   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 7227             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 7228             "sete    $res\n\t"
 7229             "movzbl  $res, $res" %}
 7230   ins_encode %{
 7231     __ lock();
 7232     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 7233     __ setb(Assembler::equal, $res$$Register);
 7234     __ movzbl($res$$Register, $res$$Register);
 7235   %}
 7236   ins_pipe( pipe_cmpxchg );
 7237 %}
 7238 
 7239 instruct compareAndSwapL(rRegI res,
 7240                          memory mem_ptr,
 7241                          rax_RegL oldval, rRegL newval,
 7242                          rFlagsReg cr)
 7243 %{
 7244   predicate(VM_Version::supports_cx8());
 7245   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7246   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7247   effect(KILL cr, KILL oldval);
 7248 
 7249   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 7250             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 7251             "sete    $res\n\t"
 7252             "movzbl  $res, $res" %}
 7253   ins_encode %{
 7254     __ lock();
 7255     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 7256     __ setb(Assembler::equal, $res$$Register);
 7257     __ movzbl($res$$Register, $res$$Register);
 7258   %}
 7259   ins_pipe( pipe_cmpxchg );
 7260 %}
 7261 
 7262 instruct compareAndSwapI(rRegI res,
 7263                          memory mem_ptr,
 7264                          rax_RegI oldval, rRegI newval,
 7265                          rFlagsReg cr)
 7266 %{
 7267   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7268   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7269   effect(KILL cr, KILL oldval);
 7270 
 7271   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 7272             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 7273             "sete    $res\n\t"
 7274             "movzbl  $res, $res" %}
 7275   ins_encode %{
 7276     __ lock();
 7277     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 7278     __ setb(Assembler::equal, $res$$Register);
 7279     __ movzbl($res$$Register, $res$$Register);
 7280   %}
 7281   ins_pipe( pipe_cmpxchg );
 7282 %}
 7283 
 7284 instruct compareAndSwapB(rRegI res,
 7285                          memory mem_ptr,
 7286                          rax_RegI oldval, rRegI newval,
 7287                          rFlagsReg cr)
 7288 %{
 7289   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7290   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7291   effect(KILL cr, KILL oldval);
 7292 
 7293   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 7294             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 7295             "sete    $res\n\t"
 7296             "movzbl  $res, $res" %}
 7297   ins_encode %{
 7298     __ lock();
 7299     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 7300     __ setb(Assembler::equal, $res$$Register);
 7301     __ movzbl($res$$Register, $res$$Register);
 7302   %}
 7303   ins_pipe( pipe_cmpxchg );
 7304 %}
 7305 
 7306 instruct compareAndSwapS(rRegI res,
 7307                          memory mem_ptr,
 7308                          rax_RegI oldval, rRegI newval,
 7309                          rFlagsReg cr)
 7310 %{
 7311   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7312   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7313   effect(KILL cr, KILL oldval);
 7314 
 7315   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 7316             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 7317             "sete    $res\n\t"
 7318             "movzbl  $res, $res" %}
 7319   ins_encode %{
 7320     __ lock();
 7321     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 7322     __ setb(Assembler::equal, $res$$Register);
 7323     __ movzbl($res$$Register, $res$$Register);
 7324   %}
 7325   ins_pipe( pipe_cmpxchg );
 7326 %}
 7327 
 7328 instruct compareAndSwapN(rRegI res,
 7329                           memory mem_ptr,
 7330                           rax_RegN oldval, rRegN newval,
 7331                           rFlagsReg cr) %{
 7332   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 7333   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 7334   effect(KILL cr, KILL oldval);
 7335 
 7336   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 7337             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 7338             "sete    $res\n\t"
 7339             "movzbl  $res, $res" %}
 7340   ins_encode %{
 7341     __ lock();
 7342     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 7343     __ setb(Assembler::equal, $res$$Register);
 7344     __ movzbl($res$$Register, $res$$Register);
 7345   %}
 7346   ins_pipe( pipe_cmpxchg );
 7347 %}
 7348 
 7349 instruct compareAndExchangeB(
 7350                          memory mem_ptr,
 7351                          rax_RegI oldval, rRegI newval,
 7352                          rFlagsReg cr)
 7353 %{
 7354   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7355   effect(KILL cr);
 7356 
 7357   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 7358             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 7359   ins_encode %{
 7360     __ lock();
 7361     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 7362   %}
 7363   ins_pipe( pipe_cmpxchg );
 7364 %}
 7365 
 7366 instruct compareAndExchangeS(
 7367                          memory mem_ptr,
 7368                          rax_RegI oldval, rRegI newval,
 7369                          rFlagsReg cr)
 7370 %{
 7371   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7372   effect(KILL cr);
 7373 
 7374   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 7375             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 7376   ins_encode %{
 7377     __ lock();
 7378     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 7379   %}
 7380   ins_pipe( pipe_cmpxchg );
 7381 %}
 7382 
 7383 instruct compareAndExchangeI(
 7384                          memory mem_ptr,
 7385                          rax_RegI oldval, rRegI newval,
 7386                          rFlagsReg cr)
 7387 %{
 7388   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7389   effect(KILL cr);
 7390 
 7391   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 7392             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 7393   ins_encode %{
 7394     __ lock();
 7395     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 7396   %}
 7397   ins_pipe( pipe_cmpxchg );
 7398 %}
 7399 
 7400 instruct compareAndExchangeL(
 7401                          memory mem_ptr,
 7402                          rax_RegL oldval, rRegL newval,
 7403                          rFlagsReg cr)
 7404 %{
 7405   predicate(VM_Version::supports_cx8());
 7406   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7407   effect(KILL cr);
 7408 
 7409   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 7410             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 7411   ins_encode %{
 7412     __ lock();
 7413     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 7414   %}
 7415   ins_pipe( pipe_cmpxchg );
 7416 %}
 7417 
 7418 instruct compareAndExchangeN(
 7419                           memory mem_ptr,
 7420                           rax_RegN oldval, rRegN newval,
 7421                           rFlagsReg cr) %{
 7422   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 7423   effect(KILL cr);
 7424 
 7425   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 7426             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 7427   ins_encode %{
 7428     __ lock();
 7429     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 7430   %}
 7431   ins_pipe( pipe_cmpxchg );
 7432 %}
 7433 
 7434 instruct compareAndExchangeP(
 7435                          memory mem_ptr,
 7436                          rax_RegP oldval, rRegP newval,
 7437                          rFlagsReg cr)
 7438 %{
 7439   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 7440   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7441   effect(KILL cr);
 7442 
 7443   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 7444             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 7445   ins_encode %{
 7446     __ lock();
 7447     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 7448   %}
 7449   ins_pipe( pipe_cmpxchg );
 7450 %}
 7451 
 7452 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 7453   predicate(n->as_LoadStore()->result_not_used());
 7454   match(Set dummy (GetAndAddB mem add));
 7455   effect(KILL cr);
 7456   format %{ "addb_lock   $mem, $add" %}
 7457   ins_encode %{
 7458     __ lock();
 7459     __ addb($mem$$Address, $add$$Register);
 7460   %}
 7461   ins_pipe(pipe_cmpxchg);
 7462 %}
 7463 
 7464 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 7465   predicate(n->as_LoadStore()->result_not_used());
 7466   match(Set dummy (GetAndAddB mem add));
 7467   effect(KILL cr);
 7468   format %{ "addb_lock   $mem, $add" %}
 7469   ins_encode %{
 7470     __ lock();
 7471     __ addb($mem$$Address, $add$$constant);
 7472   %}
 7473   ins_pipe(pipe_cmpxchg);
 7474 %}
 7475 
 7476 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
 7477   predicate(!n->as_LoadStore()->result_not_used());
 7478   match(Set newval (GetAndAddB mem newval));
 7479   effect(KILL cr);
 7480   format %{ "xaddb_lock  $mem, $newval" %}
 7481   ins_encode %{
 7482     __ lock();
 7483     __ xaddb($mem$$Address, $newval$$Register);
 7484   %}
 7485   ins_pipe(pipe_cmpxchg);
 7486 %}
 7487 
 7488 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 7489   predicate(n->as_LoadStore()->result_not_used());
 7490   match(Set dummy (GetAndAddS mem add));
 7491   effect(KILL cr);
 7492   format %{ "addw_lock   $mem, $add" %}
 7493   ins_encode %{
 7494     __ lock();
 7495     __ addw($mem$$Address, $add$$Register);
 7496   %}
 7497   ins_pipe(pipe_cmpxchg);
 7498 %}
 7499 
 7500 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 7501   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
 7502   match(Set dummy (GetAndAddS mem add));
 7503   effect(KILL cr);
 7504   format %{ "addw_lock   $mem, $add" %}
 7505   ins_encode %{
 7506     __ lock();
 7507     __ addw($mem$$Address, $add$$constant);
 7508   %}
 7509   ins_pipe(pipe_cmpxchg);
 7510 %}
 7511 
 7512 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
 7513   predicate(!n->as_LoadStore()->result_not_used());
 7514   match(Set newval (GetAndAddS mem newval));
 7515   effect(KILL cr);
 7516   format %{ "xaddw_lock  $mem, $newval" %}
 7517   ins_encode %{
 7518     __ lock();
 7519     __ xaddw($mem$$Address, $newval$$Register);
 7520   %}
 7521   ins_pipe(pipe_cmpxchg);
 7522 %}
 7523 
 7524 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
 7525   predicate(n->as_LoadStore()->result_not_used());
 7526   match(Set dummy (GetAndAddI mem add));
 7527   effect(KILL cr);
 7528   format %{ "addl_lock   $mem, $add" %}
 7529   ins_encode %{
 7530     __ lock();
 7531     __ addl($mem$$Address, $add$$Register);
 7532   %}
 7533   ins_pipe(pipe_cmpxchg);
 7534 %}
 7535 
 7536 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 7537   predicate(n->as_LoadStore()->result_not_used());
 7538   match(Set dummy (GetAndAddI mem add));
 7539   effect(KILL cr);
 7540   format %{ "addl_lock   $mem, $add" %}
 7541   ins_encode %{
 7542     __ lock();
 7543     __ addl($mem$$Address, $add$$constant);
 7544   %}
 7545   ins_pipe(pipe_cmpxchg);
 7546 %}
 7547 
 7548 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
 7549   predicate(!n->as_LoadStore()->result_not_used());
 7550   match(Set newval (GetAndAddI mem newval));
 7551   effect(KILL cr);
 7552   format %{ "xaddl_lock  $mem, $newval" %}
 7553   ins_encode %{
 7554     __ lock();
 7555     __ xaddl($mem$$Address, $newval$$Register);
 7556   %}
 7557   ins_pipe(pipe_cmpxchg);
 7558 %}
 7559 
 7560 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
 7561   predicate(n->as_LoadStore()->result_not_used());
 7562   match(Set dummy (GetAndAddL mem add));
 7563   effect(KILL cr);
 7564   format %{ "addq_lock   $mem, $add" %}
 7565   ins_encode %{
 7566     __ lock();
 7567     __ addq($mem$$Address, $add$$Register);
 7568   %}
 7569   ins_pipe(pipe_cmpxchg);
 7570 %}
 7571 
 7572 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 7573   predicate(n->as_LoadStore()->result_not_used());
 7574   match(Set dummy (GetAndAddL mem add));
 7575   effect(KILL cr);
 7576   format %{ "addq_lock   $mem, $add" %}
 7577   ins_encode %{
 7578     __ lock();
 7579     __ addq($mem$$Address, $add$$constant);
 7580   %}
 7581   ins_pipe(pipe_cmpxchg);
 7582 %}
 7583 
 7584 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
 7585   predicate(!n->as_LoadStore()->result_not_used());
 7586   match(Set newval (GetAndAddL mem newval));
 7587   effect(KILL cr);
 7588   format %{ "xaddq_lock  $mem, $newval" %}
 7589   ins_encode %{
 7590     __ lock();
 7591     __ xaddq($mem$$Address, $newval$$Register);
 7592   %}
 7593   ins_pipe(pipe_cmpxchg);
 7594 %}
 7595 
 7596 instruct xchgB( memory mem, rRegI newval) %{
 7597   match(Set newval (GetAndSetB mem newval));
 7598   format %{ "XCHGB  $newval,[$mem]" %}
 7599   ins_encode %{
 7600     __ xchgb($newval$$Register, $mem$$Address);
 7601   %}
 7602   ins_pipe( pipe_cmpxchg );
 7603 %}
 7604 
 7605 instruct xchgS( memory mem, rRegI newval) %{
 7606   match(Set newval (GetAndSetS mem newval));
 7607   format %{ "XCHGW  $newval,[$mem]" %}
 7608   ins_encode %{
 7609     __ xchgw($newval$$Register, $mem$$Address);
 7610   %}
 7611   ins_pipe( pipe_cmpxchg );
 7612 %}
 7613 
 7614 instruct xchgI( memory mem, rRegI newval) %{
 7615   match(Set newval (GetAndSetI mem newval));
 7616   format %{ "XCHGL  $newval,[$mem]" %}
 7617   ins_encode %{
 7618     __ xchgl($newval$$Register, $mem$$Address);
 7619   %}
 7620   ins_pipe( pipe_cmpxchg );
 7621 %}
 7622 
 7623 instruct xchgL( memory mem, rRegL newval) %{
 7624   match(Set newval (GetAndSetL mem newval));
 7625   format %{ "XCHGL  $newval,[$mem]" %}
 7626   ins_encode %{
 7627     __ xchgq($newval$$Register, $mem$$Address);
 7628   %}
 7629   ins_pipe( pipe_cmpxchg );
 7630 %}
 7631 
 7632 instruct xchgP( memory mem, rRegP newval) %{
 7633   match(Set newval (GetAndSetP mem newval));
 7634   predicate(n->as_LoadStore()->barrier_data() == 0);
 7635   format %{ "XCHGQ  $newval,[$mem]" %}
 7636   ins_encode %{
 7637     __ xchgq($newval$$Register, $mem$$Address);
 7638   %}
 7639   ins_pipe( pipe_cmpxchg );
 7640 %}
 7641 
 7642 instruct xchgN( memory mem, rRegN newval) %{
 7643   match(Set newval (GetAndSetN mem newval));
 7644   format %{ "XCHGL  $newval,$mem]" %}
 7645   ins_encode %{
 7646     __ xchgl($newval$$Register, $mem$$Address);
 7647   %}
 7648   ins_pipe( pipe_cmpxchg );
 7649 %}
 7650 
 7651 //----------Abs Instructions-------------------------------------------
 7652 
 7653 // Integer Absolute Instructions
 7654 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7655 %{
 7656   match(Set dst (AbsI src));
 7657   effect(TEMP dst, KILL cr);
 7658   format %{ "xorl    $dst, $dst\t# abs int\n\t"
 7659             "subl    $dst, $src\n\t"
 7660             "cmovll  $dst, $src" %}
 7661   ins_encode %{
 7662     __ xorl($dst$$Register, $dst$$Register);
 7663     __ subl($dst$$Register, $src$$Register);
 7664     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
 7665   %}
 7666 
 7667   ins_pipe(ialu_reg_reg);
 7668 %}
 7669 
 7670 // Long Absolute Instructions
 7671 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7672 %{
 7673   match(Set dst (AbsL src));
 7674   effect(TEMP dst, KILL cr);
 7675   format %{ "xorl    $dst, $dst\t# abs long\n\t"
 7676             "subq    $dst, $src\n\t"
 7677             "cmovlq  $dst, $src" %}
 7678   ins_encode %{
 7679     __ xorl($dst$$Register, $dst$$Register);
 7680     __ subq($dst$$Register, $src$$Register);
 7681     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
 7682   %}
 7683 
 7684   ins_pipe(ialu_reg_reg);
 7685 %}
 7686 
 7687 //----------Subtraction Instructions-------------------------------------------
 7688 
 7689 // Integer Subtraction Instructions
 7690 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7691 %{
 7692   match(Set dst (SubI dst src));
 7693   effect(KILL cr);
 7694   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7695 
 7696   format %{ "subl    $dst, $src\t# int" %}
 7697   ins_encode %{
 7698     __ subl($dst$$Register, $src$$Register);
 7699   %}
 7700   ins_pipe(ialu_reg_reg);
 7701 %}
 7702 
 7703 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7704 %{
 7705   match(Set dst (SubI dst (LoadI src)));
 7706   effect(KILL cr);
 7707   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7708 
 7709   ins_cost(150);
 7710   format %{ "subl    $dst, $src\t# int" %}
 7711   ins_encode %{
 7712     __ subl($dst$$Register, $src$$Address);
 7713   %}
 7714   ins_pipe(ialu_reg_mem);
 7715 %}
 7716 
 7717 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7718 %{
 7719   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7720   effect(KILL cr);
 7721   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7722 
 7723   ins_cost(150);
 7724   format %{ "subl    $dst, $src\t# int" %}
 7725   ins_encode %{
 7726     __ subl($dst$$Address, $src$$Register);
 7727   %}
 7728   ins_pipe(ialu_mem_reg);
 7729 %}
 7730 
 7731 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7732 %{
 7733   match(Set dst (SubL dst src));
 7734   effect(KILL cr);
 7735   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7736 
 7737   format %{ "subq    $dst, $src\t# long" %}
 7738   ins_encode %{
 7739     __ subq($dst$$Register, $src$$Register);
 7740   %}
 7741   ins_pipe(ialu_reg_reg);
 7742 %}
 7743 
 7744 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7745 %{
 7746   match(Set dst (SubL dst (LoadL src)));
 7747   effect(KILL cr);
 7748   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7749 
 7750   ins_cost(150);
 7751   format %{ "subq    $dst, $src\t# long" %}
 7752   ins_encode %{
 7753     __ subq($dst$$Register, $src$$Address);
 7754   %}
 7755   ins_pipe(ialu_reg_mem);
 7756 %}
 7757 
 7758 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7759 %{
 7760   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 7761   effect(KILL cr);
 7762   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 7763 
 7764   ins_cost(150);
 7765   format %{ "subq    $dst, $src\t# long" %}
 7766   ins_encode %{
 7767     __ subq($dst$$Address, $src$$Register);
 7768   %}
 7769   ins_pipe(ialu_mem_reg);
 7770 %}
 7771 
 7772 // Subtract from a pointer
 7773 // XXX hmpf???
 7774 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 7775 %{
 7776   match(Set dst (AddP dst (SubI zero src)));
 7777   effect(KILL cr);
 7778 
 7779   format %{ "subq    $dst, $src\t# ptr - int" %}
 7780   ins_encode %{
 7781     __ subq($dst$$Register, $src$$Register);
 7782   %}
 7783   ins_pipe(ialu_reg_reg);
 7784 %}
 7785 
 7786 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 7787 %{
 7788   match(Set dst (SubI zero dst));
 7789   effect(KILL cr);
 7790   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
 7791 
 7792   format %{ "negl    $dst\t# int" %}
 7793   ins_encode %{
 7794     __ negl($dst$$Register);
 7795   %}
 7796   ins_pipe(ialu_reg);
 7797 %}
 7798 
 7799 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 7800 %{
 7801   match(Set dst (NegI dst));
 7802   effect(KILL cr);
 7803   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
 7804 
 7805   format %{ "negl    $dst\t# int" %}
 7806   ins_encode %{
 7807     __ negl($dst$$Register);
 7808   %}
 7809   ins_pipe(ialu_reg);
 7810 %}
 7811 
 7812 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 7813 %{
 7814   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 7815   effect(KILL cr);
 7816   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
 7817 
 7818   format %{ "negl    $dst\t# int" %}
 7819   ins_encode %{
 7820     __ negl($dst$$Address);
 7821   %}
 7822   ins_pipe(ialu_reg);
 7823 %}
 7824 
 7825 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 7826 %{
 7827   match(Set dst (SubL zero dst));
 7828   effect(KILL cr);
 7829   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
 7830 
 7831   format %{ "negq    $dst\t# long" %}
 7832   ins_encode %{
 7833     __ negq($dst$$Register);
 7834   %}
 7835   ins_pipe(ialu_reg);
 7836 %}
 7837 
 7838 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 7839 %{
 7840   match(Set dst (NegL dst));
 7841   effect(KILL cr);
 7842   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
 7843 
 7844   format %{ "negq    $dst\t# int" %}
 7845   ins_encode %{
 7846     __ negq($dst$$Register);
 7847   %}
 7848   ins_pipe(ialu_reg);
 7849 %}
 7850 
 7851 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 7852 %{
 7853   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 7854   effect(KILL cr);
 7855   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
 7856 
 7857   format %{ "negq    $dst\t# long" %}
 7858   ins_encode %{
 7859     __ negq($dst$$Address);
 7860   %}
 7861   ins_pipe(ialu_reg);
 7862 %}
 7863 
 7864 //----------Multiplication/Division Instructions-------------------------------
 7865 // Integer Multiplication Instructions
 7866 // Multiply Register
 7867 
 7868 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7869 %{
 7870   match(Set dst (MulI dst src));
 7871   effect(KILL cr);
 7872 
 7873   ins_cost(300);
 7874   format %{ "imull   $dst, $src\t# int" %}
 7875   ins_encode %{
 7876     __ imull($dst$$Register, $src$$Register);
 7877   %}
 7878   ins_pipe(ialu_reg_reg_alu0);
 7879 %}
 7880 
 7881 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 7882 %{
 7883   match(Set dst (MulI src imm));
 7884   effect(KILL cr);
 7885 
 7886   ins_cost(300);
 7887   format %{ "imull   $dst, $src, $imm\t# int" %}
 7888   ins_encode %{
 7889     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 7890   %}
 7891   ins_pipe(ialu_reg_reg_alu0);
 7892 %}
 7893 
 7894 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 7895 %{
 7896   match(Set dst (MulI dst (LoadI src)));
 7897   effect(KILL cr);
 7898 
 7899   ins_cost(350);
 7900   format %{ "imull   $dst, $src\t# int" %}
 7901   ins_encode %{
 7902     __ imull($dst$$Register, $src$$Address);
 7903   %}
 7904   ins_pipe(ialu_reg_mem_alu0);
 7905 %}
 7906 
 7907 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 7908 %{
 7909   match(Set dst (MulI (LoadI src) imm));
 7910   effect(KILL cr);
 7911 
 7912   ins_cost(300);
 7913   format %{ "imull   $dst, $src, $imm\t# int" %}
 7914   ins_encode %{
 7915     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 7916   %}
 7917   ins_pipe(ialu_reg_mem_alu0);
 7918 %}
 7919 
 7920 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 7921 %{
 7922   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7923   effect(KILL cr, KILL src2);
 7924 
 7925   expand %{ mulI_rReg(dst, src1, cr);
 7926            mulI_rReg(src2, src3, cr);
 7927            addI_rReg(dst, src2, cr); %}
 7928 %}
 7929 
 7930 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7931 %{
 7932   match(Set dst (MulL dst src));
 7933   effect(KILL cr);
 7934 
 7935   ins_cost(300);
 7936   format %{ "imulq   $dst, $src\t# long" %}
 7937   ins_encode %{
 7938     __ imulq($dst$$Register, $src$$Register);
 7939   %}
 7940   ins_pipe(ialu_reg_reg_alu0);
 7941 %}
 7942 
 7943 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 7944 %{
 7945   match(Set dst (MulL src imm));
 7946   effect(KILL cr);
 7947 
 7948   ins_cost(300);
 7949   format %{ "imulq   $dst, $src, $imm\t# long" %}
 7950   ins_encode %{
 7951     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 7952   %}
 7953   ins_pipe(ialu_reg_reg_alu0);
 7954 %}
 7955 
 7956 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 7957 %{
 7958   match(Set dst (MulL dst (LoadL src)));
 7959   effect(KILL cr);
 7960 
 7961   ins_cost(350);
 7962   format %{ "imulq   $dst, $src\t# long" %}
 7963   ins_encode %{
 7964     __ imulq($dst$$Register, $src$$Address);
 7965   %}
 7966   ins_pipe(ialu_reg_mem_alu0);
 7967 %}
 7968 
 7969 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 7970 %{
 7971   match(Set dst (MulL (LoadL src) imm));
 7972   effect(KILL cr);
 7973 
 7974   ins_cost(300);
 7975   format %{ "imulq   $dst, $src, $imm\t# long" %}
 7976   ins_encode %{
 7977     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 7978   %}
 7979   ins_pipe(ialu_reg_mem_alu0);
 7980 %}
 7981 
 7982 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 7983 %{
 7984   match(Set dst (MulHiL src rax));
 7985   effect(USE_KILL rax, KILL cr);
 7986 
 7987   ins_cost(300);
 7988   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 7989   ins_encode %{
 7990     __ imulq($src$$Register);
 7991   %}
 7992   ins_pipe(ialu_reg_reg_alu0);
 7993 %}
 7994 
 7995 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 7996 %{
 7997   match(Set dst (UMulHiL src rax));
 7998   effect(USE_KILL rax, KILL cr);
 7999 
 8000   ins_cost(300);
 8001   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8002   ins_encode %{
 8003     __ mulq($src$$Register);
 8004   %}
 8005   ins_pipe(ialu_reg_reg_alu0);
 8006 %}
 8007 
 8008 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8009                    rFlagsReg cr)
 8010 %{
 8011   match(Set rax (DivI rax div));
 8012   effect(KILL rdx, KILL cr);
 8013 
 8014   ins_cost(30*100+10*100); // XXX
 8015   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8016             "jne,s   normal\n\t"
 8017             "xorl    rdx, rdx\n\t"
 8018             "cmpl    $div, -1\n\t"
 8019             "je,s    done\n"
 8020     "normal: cdql\n\t"
 8021             "idivl   $div\n"
 8022     "done:"        %}
 8023   ins_encode(cdql_enc(div));
 8024   ins_pipe(ialu_reg_reg_alu0);
 8025 %}
 8026 
 8027 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8028                    rFlagsReg cr)
 8029 %{
 8030   match(Set rax (DivL rax div));
 8031   effect(KILL rdx, KILL cr);
 8032 
 8033   ins_cost(30*100+10*100); // XXX
 8034   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8035             "cmpq    rax, rdx\n\t"
 8036             "jne,s   normal\n\t"
 8037             "xorl    rdx, rdx\n\t"
 8038             "cmpq    $div, -1\n\t"
 8039             "je,s    done\n"
 8040     "normal: cdqq\n\t"
 8041             "idivq   $div\n"
 8042     "done:"        %}
 8043   ins_encode(cdqq_enc(div));
 8044   ins_pipe(ialu_reg_reg_alu0);
 8045 %}
 8046 
 8047 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8048 %{
 8049   match(Set rax (UDivI rax div));
 8050   effect(KILL rdx, KILL cr);
 8051 
 8052   ins_cost(300);
 8053   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8054   ins_encode %{
 8055     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8056   %}
 8057   ins_pipe(ialu_reg_reg_alu0);
 8058 %}
 8059 
 8060 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8061 %{
 8062   match(Set rax (UDivL rax div));
 8063   effect(KILL rdx, KILL cr);
 8064 
 8065   ins_cost(300);
 8066   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8067   ins_encode %{
 8068      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8069   %}
 8070   ins_pipe(ialu_reg_reg_alu0);
 8071 %}
 8072 
 8073 // Integer DIVMOD with Register, both quotient and mod results
 8074 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8075                              rFlagsReg cr)
 8076 %{
 8077   match(DivModI rax div);
 8078   effect(KILL cr);
 8079 
 8080   ins_cost(30*100+10*100); // XXX
 8081   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8082             "jne,s   normal\n\t"
 8083             "xorl    rdx, rdx\n\t"
 8084             "cmpl    $div, -1\n\t"
 8085             "je,s    done\n"
 8086     "normal: cdql\n\t"
 8087             "idivl   $div\n"
 8088     "done:"        %}
 8089   ins_encode(cdql_enc(div));
 8090   ins_pipe(pipe_slow);
 8091 %}
 8092 
 8093 // Long DIVMOD with Register, both quotient and mod results
 8094 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8095                              rFlagsReg cr)
 8096 %{
 8097   match(DivModL rax div);
 8098   effect(KILL cr);
 8099 
 8100   ins_cost(30*100+10*100); // XXX
 8101   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8102             "cmpq    rax, rdx\n\t"
 8103             "jne,s   normal\n\t"
 8104             "xorl    rdx, rdx\n\t"
 8105             "cmpq    $div, -1\n\t"
 8106             "je,s    done\n"
 8107     "normal: cdqq\n\t"
 8108             "idivq   $div\n"
 8109     "done:"        %}
 8110   ins_encode(cdqq_enc(div));
 8111   ins_pipe(pipe_slow);
 8112 %}
 8113 
 8114 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8115 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 8116                               no_rax_rdx_RegI div, rFlagsReg cr)
 8117 %{
 8118   match(UDivModI rax div);
 8119   effect(TEMP tmp, KILL cr);
 8120 
 8121   ins_cost(300);
 8122   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 8123             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 8124           %}
 8125   ins_encode %{
 8126     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8127   %}
 8128   ins_pipe(pipe_slow);
 8129 %}
 8130 
 8131 // Unsigned long DIVMOD with Register, both quotient and mod results
 8132 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 8133                               no_rax_rdx_RegL div, rFlagsReg cr)
 8134 %{
 8135   match(UDivModL rax div);
 8136   effect(TEMP tmp, KILL cr);
 8137 
 8138   ins_cost(300);
 8139   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 8140             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 8141           %}
 8142   ins_encode %{
 8143     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8144   %}
 8145   ins_pipe(pipe_slow);
 8146 %}
 8147 
 8148 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 8149                    rFlagsReg cr)
 8150 %{
 8151   match(Set rdx (ModI rax div));
 8152   effect(KILL rax, KILL cr);
 8153 
 8154   ins_cost(300); // XXX
 8155   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 8156             "jne,s   normal\n\t"
 8157             "xorl    rdx, rdx\n\t"
 8158             "cmpl    $div, -1\n\t"
 8159             "je,s    done\n"
 8160     "normal: cdql\n\t"
 8161             "idivl   $div\n"
 8162     "done:"        %}
 8163   ins_encode(cdql_enc(div));
 8164   ins_pipe(ialu_reg_reg_alu0);
 8165 %}
 8166 
 8167 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 8168                    rFlagsReg cr)
 8169 %{
 8170   match(Set rdx (ModL rax div));
 8171   effect(KILL rax, KILL cr);
 8172 
 8173   ins_cost(300); // XXX
 8174   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 8175             "cmpq    rax, rdx\n\t"
 8176             "jne,s   normal\n\t"
 8177             "xorl    rdx, rdx\n\t"
 8178             "cmpq    $div, -1\n\t"
 8179             "je,s    done\n"
 8180     "normal: cdqq\n\t"
 8181             "idivq   $div\n"
 8182     "done:"        %}
 8183   ins_encode(cdqq_enc(div));
 8184   ins_pipe(ialu_reg_reg_alu0);
 8185 %}
 8186 
 8187 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 8188 %{
 8189   match(Set rdx (UModI rax div));
 8190   effect(KILL rax, KILL cr);
 8191 
 8192   ins_cost(300);
 8193   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 8194   ins_encode %{
 8195     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 8196   %}
 8197   ins_pipe(ialu_reg_reg_alu0);
 8198 %}
 8199 
 8200 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 8201 %{
 8202   match(Set rdx (UModL rax div));
 8203   effect(KILL rax, KILL cr);
 8204 
 8205   ins_cost(300);
 8206   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 8207   ins_encode %{
 8208     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 8209   %}
 8210   ins_pipe(ialu_reg_reg_alu0);
 8211 %}
 8212 
 8213 // Integer Shift Instructions
 8214 // Shift Left by one, two, three
 8215 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
 8216 %{
 8217   match(Set dst (LShiftI dst shift));
 8218   effect(KILL cr);
 8219 
 8220   format %{ "sall    $dst, $shift" %}
 8221   ins_encode %{
 8222     __ sall($dst$$Register, $shift$$constant);
 8223   %}
 8224   ins_pipe(ialu_reg);
 8225 %}
 8226 
 8227 // Shift Left by 8-bit immediate
 8228 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 8229 %{
 8230   match(Set dst (LShiftI dst shift));
 8231   effect(KILL cr);
 8232 
 8233   format %{ "sall    $dst, $shift" %}
 8234   ins_encode %{
 8235     __ sall($dst$$Register, $shift$$constant);
 8236   %}
 8237   ins_pipe(ialu_reg);
 8238 %}
 8239 
 8240 // Shift Left by 8-bit immediate
 8241 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 8242 %{
 8243   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 8244   effect(KILL cr);
 8245 
 8246   format %{ "sall    $dst, $shift" %}
 8247   ins_encode %{
 8248     __ sall($dst$$Address, $shift$$constant);
 8249   %}
 8250   ins_pipe(ialu_mem_imm);
 8251 %}
 8252 
 8253 // Shift Left by variable
 8254 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 8255 %{
 8256   predicate(!VM_Version::supports_bmi2());
 8257   match(Set dst (LShiftI dst shift));
 8258   effect(KILL cr);
 8259 
 8260   format %{ "sall    $dst, $shift" %}
 8261   ins_encode %{
 8262     __ sall($dst$$Register);
 8263   %}
 8264   ins_pipe(ialu_reg_reg);
 8265 %}
 8266 
 8267 // Shift Left by variable
 8268 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 8269 %{
 8270   predicate(!VM_Version::supports_bmi2());
 8271   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 8272   effect(KILL cr);
 8273 
 8274   format %{ "sall    $dst, $shift" %}
 8275   ins_encode %{
 8276     __ sall($dst$$Address);
 8277   %}
 8278   ins_pipe(ialu_mem_reg);
 8279 %}
 8280 
 8281 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 8282 %{
 8283   predicate(VM_Version::supports_bmi2());
 8284   match(Set dst (LShiftI src shift));
 8285 
 8286   format %{ "shlxl   $dst, $src, $shift" %}
 8287   ins_encode %{
 8288     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 8289   %}
 8290   ins_pipe(ialu_reg_reg);
 8291 %}
 8292 
 8293 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 8294 %{
 8295   predicate(VM_Version::supports_bmi2());
 8296   match(Set dst (LShiftI (LoadI src) shift));
 8297   ins_cost(175);
 8298   format %{ "shlxl   $dst, $src, $shift" %}
 8299   ins_encode %{
 8300     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 8301   %}
 8302   ins_pipe(ialu_reg_mem);
 8303 %}
 8304 
 8305 // Arithmetic Shift Right by 8-bit immediate
 8306 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 8307 %{
 8308   match(Set dst (RShiftI dst shift));
 8309   effect(KILL cr);
 8310 
 8311   format %{ "sarl    $dst, $shift" %}
 8312   ins_encode %{
 8313     __ sarl($dst$$Register, $shift$$constant);
 8314   %}
 8315   ins_pipe(ialu_mem_imm);
 8316 %}
 8317 
 8318 // Arithmetic Shift Right by 8-bit immediate
 8319 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 8320 %{
 8321   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8322   effect(KILL cr);
 8323 
 8324   format %{ "sarl    $dst, $shift" %}
 8325   ins_encode %{
 8326     __ sarl($dst$$Address, $shift$$constant);
 8327   %}
 8328   ins_pipe(ialu_mem_imm);
 8329 %}
 8330 
 8331 // Arithmetic Shift Right by variable
 8332 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 8333 %{
 8334   predicate(!VM_Version::supports_bmi2());
 8335   match(Set dst (RShiftI dst shift));
 8336   effect(KILL cr);
 8337 
 8338   format %{ "sarl    $dst, $shift" %}
 8339   ins_encode %{
 8340     __ sarl($dst$$Register);
 8341   %}
 8342   ins_pipe(ialu_reg_reg);
 8343 %}
 8344 
 8345 // Arithmetic Shift Right by variable
 8346 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 8347 %{
 8348   predicate(!VM_Version::supports_bmi2());
 8349   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8350   effect(KILL cr);
 8351 
 8352   format %{ "sarl    $dst, $shift" %}
 8353   ins_encode %{
 8354     __ sarl($dst$$Address);
 8355   %}
 8356   ins_pipe(ialu_mem_reg);
 8357 %}
 8358 
 8359 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 8360 %{
 8361   predicate(VM_Version::supports_bmi2());
 8362   match(Set dst (RShiftI src shift));
 8363 
 8364   format %{ "sarxl   $dst, $src, $shift" %}
 8365   ins_encode %{
 8366     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 8367   %}
 8368   ins_pipe(ialu_reg_reg);
 8369 %}
 8370 
 8371 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 8372 %{
 8373   predicate(VM_Version::supports_bmi2());
 8374   match(Set dst (RShiftI (LoadI src) shift));
 8375   ins_cost(175);
 8376   format %{ "sarxl   $dst, $src, $shift" %}
 8377   ins_encode %{
 8378     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 8379   %}
 8380   ins_pipe(ialu_reg_mem);
 8381 %}
 8382 
 8383 // Logical Shift Right by 8-bit immediate
 8384 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 8385 %{
 8386   match(Set dst (URShiftI dst shift));
 8387   effect(KILL cr);
 8388 
 8389   format %{ "shrl    $dst, $shift" %}
 8390   ins_encode %{
 8391     __ shrl($dst$$Register, $shift$$constant);
 8392   %}
 8393   ins_pipe(ialu_reg);
 8394 %}
 8395 
 8396 // Logical Shift Right by 8-bit immediate
 8397 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 8398 %{
 8399   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 8400   effect(KILL cr);
 8401 
 8402   format %{ "shrl    $dst, $shift" %}
 8403   ins_encode %{
 8404     __ shrl($dst$$Address, $shift$$constant);
 8405   %}
 8406   ins_pipe(ialu_mem_imm);
 8407 %}
 8408 
 8409 // Logical Shift Right by variable
 8410 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 8411 %{
 8412   predicate(!VM_Version::supports_bmi2());
 8413   match(Set dst (URShiftI dst shift));
 8414   effect(KILL cr);
 8415 
 8416   format %{ "shrl    $dst, $shift" %}
 8417   ins_encode %{
 8418     __ shrl($dst$$Register);
 8419   %}
 8420   ins_pipe(ialu_reg_reg);
 8421 %}
 8422 
 8423 // Logical Shift Right by variable
 8424 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 8425 %{
 8426   predicate(!VM_Version::supports_bmi2());
 8427   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 8428   effect(KILL cr);
 8429 
 8430   format %{ "shrl    $dst, $shift" %}
 8431   ins_encode %{
 8432     __ shrl($dst$$Address);
 8433   %}
 8434   ins_pipe(ialu_mem_reg);
 8435 %}
 8436 
 8437 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 8438 %{
 8439   predicate(VM_Version::supports_bmi2());
 8440   match(Set dst (URShiftI src shift));
 8441 
 8442   format %{ "shrxl   $dst, $src, $shift" %}
 8443   ins_encode %{
 8444     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 8445   %}
 8446   ins_pipe(ialu_reg_reg);
 8447 %}
 8448 
 8449 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 8450 %{
 8451   predicate(VM_Version::supports_bmi2());
 8452   match(Set dst (URShiftI (LoadI src) shift));
 8453   ins_cost(175);
 8454   format %{ "shrxl   $dst, $src, $shift" %}
 8455   ins_encode %{
 8456     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 8457   %}
 8458   ins_pipe(ialu_reg_mem);
 8459 %}
 8460 
 8461 // Long Shift Instructions
 8462 // Shift Left by one, two, three
 8463 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
 8464 %{
 8465   match(Set dst (LShiftL dst shift));
 8466   effect(KILL cr);
 8467 
 8468   format %{ "salq    $dst, $shift" %}
 8469   ins_encode %{
 8470     __ salq($dst$$Register, $shift$$constant);
 8471   %}
 8472   ins_pipe(ialu_reg);
 8473 %}
 8474 
 8475 // Shift Left by 8-bit immediate
 8476 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 8477 %{
 8478   match(Set dst (LShiftL dst shift));
 8479   effect(KILL cr);
 8480 
 8481   format %{ "salq    $dst, $shift" %}
 8482   ins_encode %{
 8483     __ salq($dst$$Register, $shift$$constant);
 8484   %}
 8485   ins_pipe(ialu_reg);
 8486 %}
 8487 
 8488 // Shift Left by 8-bit immediate
 8489 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 8490 %{
 8491   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 8492   effect(KILL cr);
 8493 
 8494   format %{ "salq    $dst, $shift" %}
 8495   ins_encode %{
 8496     __ salq($dst$$Address, $shift$$constant);
 8497   %}
 8498   ins_pipe(ialu_mem_imm);
 8499 %}
 8500 
 8501 // Shift Left by variable
 8502 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 8503 %{
 8504   predicate(!VM_Version::supports_bmi2());
 8505   match(Set dst (LShiftL dst shift));
 8506   effect(KILL cr);
 8507 
 8508   format %{ "salq    $dst, $shift" %}
 8509   ins_encode %{
 8510     __ salq($dst$$Register);
 8511   %}
 8512   ins_pipe(ialu_reg_reg);
 8513 %}
 8514 
 8515 // Shift Left by variable
 8516 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 8517 %{
 8518   predicate(!VM_Version::supports_bmi2());
 8519   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 8520   effect(KILL cr);
 8521 
 8522   format %{ "salq    $dst, $shift" %}
 8523   ins_encode %{
 8524     __ salq($dst$$Address);
 8525   %}
 8526   ins_pipe(ialu_mem_reg);
 8527 %}
 8528 
 8529 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 8530 %{
 8531   predicate(VM_Version::supports_bmi2());
 8532   match(Set dst (LShiftL src shift));
 8533 
 8534   format %{ "shlxq   $dst, $src, $shift" %}
 8535   ins_encode %{
 8536     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 8537   %}
 8538   ins_pipe(ialu_reg_reg);
 8539 %}
 8540 
 8541 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 8542 %{
 8543   predicate(VM_Version::supports_bmi2());
 8544   match(Set dst (LShiftL (LoadL src) shift));
 8545   ins_cost(175);
 8546   format %{ "shlxq   $dst, $src, $shift" %}
 8547   ins_encode %{
 8548     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 8549   %}
 8550   ins_pipe(ialu_reg_mem);
 8551 %}
 8552 
 8553 // Arithmetic Shift Right by 8-bit immediate
 8554 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 8555 %{
 8556   match(Set dst (RShiftL dst shift));
 8557   effect(KILL cr);
 8558 
 8559   format %{ "sarq    $dst, $shift" %}
 8560   ins_encode %{
 8561     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 8562   %}
 8563   ins_pipe(ialu_mem_imm);
 8564 %}
 8565 
 8566 // Arithmetic Shift Right by 8-bit immediate
 8567 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 8568 %{
 8569   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 8570   effect(KILL cr);
 8571 
 8572   format %{ "sarq    $dst, $shift" %}
 8573   ins_encode %{
 8574     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 8575   %}
 8576   ins_pipe(ialu_mem_imm);
 8577 %}
 8578 
 8579 // Arithmetic Shift Right by variable
 8580 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 8581 %{
 8582   predicate(!VM_Version::supports_bmi2());
 8583   match(Set dst (RShiftL dst shift));
 8584   effect(KILL cr);
 8585 
 8586   format %{ "sarq    $dst, $shift" %}
 8587   ins_encode %{
 8588     __ sarq($dst$$Register);
 8589   %}
 8590   ins_pipe(ialu_reg_reg);
 8591 %}
 8592 
 8593 // Arithmetic Shift Right by variable
 8594 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 8595 %{
 8596   predicate(!VM_Version::supports_bmi2());
 8597   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "sarq    $dst, $shift" %}
 8601   ins_encode %{
 8602     __ sarq($dst$$Address);
 8603   %}
 8604   ins_pipe(ialu_mem_reg);
 8605 %}
 8606 
 8607 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 8608 %{
 8609   predicate(VM_Version::supports_bmi2());
 8610   match(Set dst (RShiftL src shift));
 8611 
 8612   format %{ "sarxq   $dst, $src, $shift" %}
 8613   ins_encode %{
 8614     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 8615   %}
 8616   ins_pipe(ialu_reg_reg);
 8617 %}
 8618 
 8619 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 8620 %{
 8621   predicate(VM_Version::supports_bmi2());
 8622   match(Set dst (RShiftL (LoadL src) shift));
 8623   ins_cost(175);
 8624   format %{ "sarxq   $dst, $src, $shift" %}
 8625   ins_encode %{
 8626     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 8627   %}
 8628   ins_pipe(ialu_reg_mem);
 8629 %}
 8630 
 8631 // Logical Shift Right by 8-bit immediate
 8632 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 8633 %{
 8634   match(Set dst (URShiftL dst shift));
 8635   effect(KILL cr);
 8636 
 8637   format %{ "shrq    $dst, $shift" %}
 8638   ins_encode %{
 8639     __ shrq($dst$$Register, $shift$$constant);
 8640   %}
 8641   ins_pipe(ialu_reg);
 8642 %}
 8643 
 8644 // Logical Shift Right by 8-bit immediate
 8645 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 8646 %{
 8647   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 8648   effect(KILL cr);
 8649 
 8650   format %{ "shrq    $dst, $shift" %}
 8651   ins_encode %{
 8652     __ shrq($dst$$Address, $shift$$constant);
 8653   %}
 8654   ins_pipe(ialu_mem_imm);
 8655 %}
 8656 
 8657 // Logical Shift Right by variable
 8658 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 8659 %{
 8660   predicate(!VM_Version::supports_bmi2());
 8661   match(Set dst (URShiftL dst shift));
 8662   effect(KILL cr);
 8663 
 8664   format %{ "shrq    $dst, $shift" %}
 8665   ins_encode %{
 8666     __ shrq($dst$$Register);
 8667   %}
 8668   ins_pipe(ialu_reg_reg);
 8669 %}
 8670 
 8671 // Logical Shift Right by variable
 8672 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 8673 %{
 8674   predicate(!VM_Version::supports_bmi2());
 8675   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 8676   effect(KILL cr);
 8677 
 8678   format %{ "shrq    $dst, $shift" %}
 8679   ins_encode %{
 8680     __ shrq($dst$$Address);
 8681   %}
 8682   ins_pipe(ialu_mem_reg);
 8683 %}
 8684 
 8685 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 8686 %{
 8687   predicate(VM_Version::supports_bmi2());
 8688   match(Set dst (URShiftL src shift));
 8689 
 8690   format %{ "shrxq   $dst, $src, $shift" %}
 8691   ins_encode %{
 8692     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 8693   %}
 8694   ins_pipe(ialu_reg_reg);
 8695 %}
 8696 
 8697 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 8698 %{
 8699   predicate(VM_Version::supports_bmi2());
 8700   match(Set dst (URShiftL (LoadL src) shift));
 8701   ins_cost(175);
 8702   format %{ "shrxq   $dst, $src, $shift" %}
 8703   ins_encode %{
 8704     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 8705   %}
 8706   ins_pipe(ialu_reg_mem);
 8707 %}
 8708 
 8709 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8710 // This idiom is used by the compiler for the i2b bytecode.
 8711 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 8712 %{
 8713   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8714 
 8715   format %{ "movsbl  $dst, $src\t# i2b" %}
 8716   ins_encode %{
 8717     __ movsbl($dst$$Register, $src$$Register);
 8718   %}
 8719   ins_pipe(ialu_reg_reg);
 8720 %}
 8721 
 8722 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8723 // This idiom is used by the compiler the i2s bytecode.
 8724 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 8725 %{
 8726   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8727 
 8728   format %{ "movswl  $dst, $src\t# i2s" %}
 8729   ins_encode %{
 8730     __ movswl($dst$$Register, $src$$Register);
 8731   %}
 8732   ins_pipe(ialu_reg_reg);
 8733 %}
 8734 
 8735 // ROL/ROR instructions
 8736 
 8737 // Rotate left by constant.
 8738 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 8739 %{
 8740   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 8741   match(Set dst (RotateLeft dst shift));
 8742   effect(KILL cr);
 8743   format %{ "roll    $dst, $shift" %}
 8744   ins_encode %{
 8745     __ roll($dst$$Register, $shift$$constant);
 8746   %}
 8747   ins_pipe(ialu_reg);
 8748 %}
 8749 
 8750 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 8751 %{
 8752   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 8753   match(Set dst (RotateLeft src shift));
 8754   format %{ "rolxl   $dst, $src, $shift" %}
 8755   ins_encode %{
 8756     int shift = 32 - ($shift$$constant & 31);
 8757     __ rorxl($dst$$Register, $src$$Register, shift);
 8758   %}
 8759   ins_pipe(ialu_reg_reg);
 8760 %}
 8761 
 8762 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 8763 %{
 8764   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 8765   match(Set dst (RotateLeft (LoadI src) shift));
 8766   ins_cost(175);
 8767   format %{ "rolxl   $dst, $src, $shift" %}
 8768   ins_encode %{
 8769     int shift = 32 - ($shift$$constant & 31);
 8770     __ rorxl($dst$$Register, $src$$Address, shift);
 8771   %}
 8772   ins_pipe(ialu_reg_mem);
 8773 %}
 8774 
 8775 // Rotate Left by variable
 8776 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 8777 %{
 8778   predicate(n->bottom_type()->basic_type() == T_INT);
 8779   match(Set dst (RotateLeft dst shift));
 8780   effect(KILL cr);
 8781   format %{ "roll    $dst, $shift" %}
 8782   ins_encode %{
 8783     __ roll($dst$$Register);
 8784   %}
 8785   ins_pipe(ialu_reg_reg);
 8786 %}
 8787 
 8788 // Rotate Right by constant.
 8789 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 8790 %{
 8791   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 8792   match(Set dst (RotateRight dst shift));
 8793   effect(KILL cr);
 8794   format %{ "rorl    $dst, $shift" %}
 8795   ins_encode %{
 8796     __ rorl($dst$$Register, $shift$$constant);
 8797   %}
 8798   ins_pipe(ialu_reg);
 8799 %}
 8800 
 8801 // Rotate Right by constant.
 8802 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 8803 %{
 8804   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 8805   match(Set dst (RotateRight src shift));
 8806   format %{ "rorxl   $dst, $src, $shift" %}
 8807   ins_encode %{
 8808     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 8809   %}
 8810   ins_pipe(ialu_reg_reg);
 8811 %}
 8812 
 8813 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 8814 %{
 8815   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 8816   match(Set dst (RotateRight (LoadI src) shift));
 8817   ins_cost(175);
 8818   format %{ "rorxl   $dst, $src, $shift" %}
 8819   ins_encode %{
 8820     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 8821   %}
 8822   ins_pipe(ialu_reg_mem);
 8823 %}
 8824 
 8825 // Rotate Right by variable
 8826 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 8827 %{
 8828   predicate(n->bottom_type()->basic_type() == T_INT);
 8829   match(Set dst (RotateRight dst shift));
 8830   effect(KILL cr);
 8831   format %{ "rorl    $dst, $shift" %}
 8832   ins_encode %{
 8833     __ rorl($dst$$Register);
 8834   %}
 8835   ins_pipe(ialu_reg_reg);
 8836 %}
 8837 
 8838 // Rotate Left by constant.
 8839 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 8840 %{
 8841   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 8842   match(Set dst (RotateLeft dst shift));
 8843   effect(KILL cr);
 8844   format %{ "rolq    $dst, $shift" %}
 8845   ins_encode %{
 8846     __ rolq($dst$$Register, $shift$$constant);
 8847   %}
 8848   ins_pipe(ialu_reg);
 8849 %}
 8850 
 8851 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 8852 %{
 8853   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 8854   match(Set dst (RotateLeft src shift));
 8855   format %{ "rolxq   $dst, $src, $shift" %}
 8856   ins_encode %{
 8857     int shift = 64 - ($shift$$constant & 63);
 8858     __ rorxq($dst$$Register, $src$$Register, shift);
 8859   %}
 8860   ins_pipe(ialu_reg_reg);
 8861 %}
 8862 
 8863 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 8864 %{
 8865   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 8866   match(Set dst (RotateLeft (LoadL src) shift));
 8867   ins_cost(175);
 8868   format %{ "rolxq   $dst, $src, $shift" %}
 8869   ins_encode %{
 8870     int shift = 64 - ($shift$$constant & 63);
 8871     __ rorxq($dst$$Register, $src$$Address, shift);
 8872   %}
 8873   ins_pipe(ialu_reg_mem);
 8874 %}
 8875 
 8876 // Rotate Left by variable
 8877 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 8878 %{
 8879   predicate(n->bottom_type()->basic_type() == T_LONG);
 8880   match(Set dst (RotateLeft dst shift));
 8881   effect(KILL cr);
 8882   format %{ "rolq    $dst, $shift" %}
 8883   ins_encode %{
 8884     __ rolq($dst$$Register);
 8885   %}
 8886   ins_pipe(ialu_reg_reg);
 8887 %}
 8888 
 8889 // Rotate Right by constant.
 8890 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 8891 %{
 8892   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 8893   match(Set dst (RotateRight dst shift));
 8894   effect(KILL cr);
 8895   format %{ "rorq    $dst, $shift" %}
 8896   ins_encode %{
 8897     __ rorq($dst$$Register, $shift$$constant);
 8898   %}
 8899   ins_pipe(ialu_reg);
 8900 %}
 8901 
 8902 // Rotate Right by constant
 8903 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 8904 %{
 8905   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 8906   match(Set dst (RotateRight src shift));
 8907   format %{ "rorxq   $dst, $src, $shift" %}
 8908   ins_encode %{
 8909     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 8910   %}
 8911   ins_pipe(ialu_reg_reg);
 8912 %}
 8913 
 8914 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 8915 %{
 8916   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 8917   match(Set dst (RotateRight (LoadL src) shift));
 8918   ins_cost(175);
 8919   format %{ "rorxq   $dst, $src, $shift" %}
 8920   ins_encode %{
 8921     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 8922   %}
 8923   ins_pipe(ialu_reg_mem);
 8924 %}
 8925 
 8926 // Rotate Right by variable
 8927 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 8928 %{
 8929   predicate(n->bottom_type()->basic_type() == T_LONG);
 8930   match(Set dst (RotateRight dst shift));
 8931   effect(KILL cr);
 8932   format %{ "rorq    $dst, $shift" %}
 8933   ins_encode %{
 8934     __ rorq($dst$$Register);
 8935   %}
 8936   ins_pipe(ialu_reg_reg);
 8937 %}
 8938 
 8939 //----------------------------- CompressBits/ExpandBits ------------------------
 8940 
 8941 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 8942   predicate(n->bottom_type()->isa_long());
 8943   match(Set dst (CompressBits src mask));
 8944   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 8945   ins_encode %{
 8946     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 8947   %}
 8948   ins_pipe( pipe_slow );
 8949 %}
 8950 
 8951 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 8952   predicate(n->bottom_type()->isa_long());
 8953   match(Set dst (ExpandBits src mask));
 8954   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 8955   ins_encode %{
 8956     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 8957   %}
 8958   ins_pipe( pipe_slow );
 8959 %}
 8960 
 8961 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 8962   predicate(n->bottom_type()->isa_long());
 8963   match(Set dst (CompressBits src (LoadL mask)));
 8964   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 8965   ins_encode %{
 8966     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 8967   %}
 8968   ins_pipe( pipe_slow );
 8969 %}
 8970 
 8971 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 8972   predicate(n->bottom_type()->isa_long());
 8973   match(Set dst (ExpandBits src (LoadL mask)));
 8974   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 8975   ins_encode %{
 8976     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 8977   %}
 8978   ins_pipe( pipe_slow );
 8979 %}
 8980 
 8981 
 8982 // Logical Instructions
 8983 
 8984 // Integer Logical Instructions
 8985 
 8986 // And Instructions
 8987 // And Register with Register
 8988 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8989 %{
 8990   match(Set dst (AndI dst src));
 8991   effect(KILL cr);
 8992   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 8993 
 8994   format %{ "andl    $dst, $src\t# int" %}
 8995   ins_encode %{
 8996     __ andl($dst$$Register, $src$$Register);
 8997   %}
 8998   ins_pipe(ialu_reg_reg);
 8999 %}
 9000 
 9001 // And Register with Immediate 255
 9002 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9003 %{
 9004   match(Set dst (AndI src mask));
 9005 
 9006   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9007   ins_encode %{
 9008     __ movzbl($dst$$Register, $src$$Register);
 9009   %}
 9010   ins_pipe(ialu_reg);
 9011 %}
 9012 
 9013 // And Register with Immediate 255 and promote to long
 9014 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9015 %{
 9016   match(Set dst (ConvI2L (AndI src mask)));
 9017 
 9018   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9019   ins_encode %{
 9020     __ movzbl($dst$$Register, $src$$Register);
 9021   %}
 9022   ins_pipe(ialu_reg);
 9023 %}
 9024 
 9025 // And Register with Immediate 65535
 9026 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9027 %{
 9028   match(Set dst (AndI src mask));
 9029 
 9030   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9031   ins_encode %{
 9032     __ movzwl($dst$$Register, $src$$Register);
 9033   %}
 9034   ins_pipe(ialu_reg);
 9035 %}
 9036 
 9037 // And Register with Immediate 65535 and promote to long
 9038 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9039 %{
 9040   match(Set dst (ConvI2L (AndI src mask)));
 9041 
 9042   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9043   ins_encode %{
 9044     __ movzwl($dst$$Register, $src$$Register);
 9045   %}
 9046   ins_pipe(ialu_reg);
 9047 %}
 9048 
 9049 // Can skip int2long conversions after AND with small bitmask
 9050 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9051 %{
 9052   predicate(VM_Version::supports_bmi2());
 9053   ins_cost(125);
 9054   effect(TEMP tmp, KILL cr);
 9055   match(Set dst (ConvI2L (AndI src mask)));
 9056   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9057   ins_encode %{
 9058     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9059     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9060   %}
 9061   ins_pipe(ialu_reg_reg);
 9062 %}
 9063 
 9064 // And Register with Immediate
 9065 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9066 %{
 9067   match(Set dst (AndI dst src));
 9068   effect(KILL cr);
 9069   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9070 
 9071   format %{ "andl    $dst, $src\t# int" %}
 9072   ins_encode %{
 9073     __ andl($dst$$Register, $src$$constant);
 9074   %}
 9075   ins_pipe(ialu_reg);
 9076 %}
 9077 
 9078 // And Register with Memory
 9079 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9080 %{
 9081   match(Set dst (AndI dst (LoadI src)));
 9082   effect(KILL cr);
 9083   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9084 
 9085   ins_cost(150);
 9086   format %{ "andl    $dst, $src\t# int" %}
 9087   ins_encode %{
 9088     __ andl($dst$$Register, $src$$Address);
 9089   %}
 9090   ins_pipe(ialu_reg_mem);
 9091 %}
 9092 
 9093 // And Memory with Register
 9094 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9095 %{
 9096   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9097   effect(KILL cr);
 9098   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9099 
 9100   ins_cost(150);
 9101   format %{ "andb    $dst, $src\t# byte" %}
 9102   ins_encode %{
 9103     __ andb($dst$$Address, $src$$Register);
 9104   %}
 9105   ins_pipe(ialu_mem_reg);
 9106 %}
 9107 
 9108 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9109 %{
 9110   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9111   effect(KILL cr);
 9112   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9113 
 9114   ins_cost(150);
 9115   format %{ "andl    $dst, $src\t# int" %}
 9116   ins_encode %{
 9117     __ andl($dst$$Address, $src$$Register);
 9118   %}
 9119   ins_pipe(ialu_mem_reg);
 9120 %}
 9121 
 9122 // And Memory with Immediate
 9123 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9124 %{
 9125   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9126   effect(KILL cr);
 9127   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9128 
 9129   ins_cost(125);
 9130   format %{ "andl    $dst, $src\t# int" %}
 9131   ins_encode %{
 9132     __ andl($dst$$Address, $src$$constant);
 9133   %}
 9134   ins_pipe(ialu_mem_imm);
 9135 %}
 9136 
 9137 // BMI1 instructions
 9138 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
 9139   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
 9140   predicate(UseBMI1Instructions);
 9141   effect(KILL cr);
 9142   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9143 
 9144   ins_cost(125);
 9145   format %{ "andnl  $dst, $src1, $src2" %}
 9146 
 9147   ins_encode %{
 9148     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 9149   %}
 9150   ins_pipe(ialu_reg_mem);
 9151 %}
 9152 
 9153 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
 9154   match(Set dst (AndI (XorI src1 minus_1) src2));
 9155   predicate(UseBMI1Instructions);
 9156   effect(KILL cr);
 9157   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9158 
 9159   format %{ "andnl  $dst, $src1, $src2" %}
 9160 
 9161   ins_encode %{
 9162     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 9163   %}
 9164   ins_pipe(ialu_reg);
 9165 %}
 9166 
 9167 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
 9168   match(Set dst (AndI (SubI imm_zero src) src));
 9169   predicate(UseBMI1Instructions);
 9170   effect(KILL cr);
 9171   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9172 
 9173   format %{ "blsil  $dst, $src" %}
 9174 
 9175   ins_encode %{
 9176     __ blsil($dst$$Register, $src$$Register);
 9177   %}
 9178   ins_pipe(ialu_reg);
 9179 %}
 9180 
 9181 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
 9182   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 9183   predicate(UseBMI1Instructions);
 9184   effect(KILL cr);
 9185   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9186 
 9187   ins_cost(125);
 9188   format %{ "blsil  $dst, $src" %}
 9189 
 9190   ins_encode %{
 9191     __ blsil($dst$$Register, $src$$Address);
 9192   %}
 9193   ins_pipe(ialu_reg_mem);
 9194 %}
 9195 
 9196 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
 9197 %{
 9198   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
 9199   predicate(UseBMI1Instructions);
 9200   effect(KILL cr);
 9201   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
 9202 
 9203   ins_cost(125);
 9204   format %{ "blsmskl $dst, $src" %}
 9205 
 9206   ins_encode %{
 9207     __ blsmskl($dst$$Register, $src$$Address);
 9208   %}
 9209   ins_pipe(ialu_reg_mem);
 9210 %}
 9211 
 9212 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
 9213 %{
 9214   match(Set dst (XorI (AddI src minus_1) src));
 9215   predicate(UseBMI1Instructions);
 9216   effect(KILL cr);
 9217   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
 9218 
 9219   format %{ "blsmskl $dst, $src" %}
 9220 
 9221   ins_encode %{
 9222     __ blsmskl($dst$$Register, $src$$Register);
 9223   %}
 9224 
 9225   ins_pipe(ialu_reg);
 9226 %}
 9227 
 9228 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
 9229 %{
 9230   match(Set dst (AndI (AddI src minus_1) src) );
 9231   predicate(UseBMI1Instructions);
 9232   effect(KILL cr);
 9233   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9234 
 9235   format %{ "blsrl  $dst, $src" %}
 9236 
 9237   ins_encode %{
 9238     __ blsrl($dst$$Register, $src$$Register);
 9239   %}
 9240 
 9241   ins_pipe(ialu_reg_mem);
 9242 %}
 9243 
 9244 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
 9245 %{
 9246   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
 9247   predicate(UseBMI1Instructions);
 9248   effect(KILL cr);
 9249   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9250 
 9251   ins_cost(125);
 9252   format %{ "blsrl  $dst, $src" %}
 9253 
 9254   ins_encode %{
 9255     __ blsrl($dst$$Register, $src$$Address);
 9256   %}
 9257 
 9258   ins_pipe(ialu_reg);
 9259 %}
 9260 
 9261 // Or Instructions
 9262 // Or Register with Register
 9263 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9264 %{
 9265   match(Set dst (OrI dst src));
 9266   effect(KILL cr);
 9267   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9268 
 9269   format %{ "orl     $dst, $src\t# int" %}
 9270   ins_encode %{
 9271     __ orl($dst$$Register, $src$$Register);
 9272   %}
 9273   ins_pipe(ialu_reg_reg);
 9274 %}
 9275 
 9276 // Or Register with Immediate
 9277 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9278 %{
 9279   match(Set dst (OrI dst src));
 9280   effect(KILL cr);
 9281   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9282 
 9283   format %{ "orl     $dst, $src\t# int" %}
 9284   ins_encode %{
 9285     __ orl($dst$$Register, $src$$constant);
 9286   %}
 9287   ins_pipe(ialu_reg);
 9288 %}
 9289 
 9290 // Or Register with Memory
 9291 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9292 %{
 9293   match(Set dst (OrI dst (LoadI src)));
 9294   effect(KILL cr);
 9295   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9296 
 9297   ins_cost(150);
 9298   format %{ "orl     $dst, $src\t# int" %}
 9299   ins_encode %{
 9300     __ orl($dst$$Register, $src$$Address);
 9301   %}
 9302   ins_pipe(ialu_reg_mem);
 9303 %}
 9304 
 9305 // Or Memory with Register
 9306 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9307 %{
 9308   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
 9309   effect(KILL cr);
 9310   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9311 
 9312   ins_cost(150);
 9313   format %{ "orb    $dst, $src\t# byte" %}
 9314   ins_encode %{
 9315     __ orb($dst$$Address, $src$$Register);
 9316   %}
 9317   ins_pipe(ialu_mem_reg);
 9318 %}
 9319 
 9320 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9321 %{
 9322   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 9323   effect(KILL cr);
 9324   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9325 
 9326   ins_cost(150);
 9327   format %{ "orl     $dst, $src\t# int" %}
 9328   ins_encode %{
 9329     __ orl($dst$$Address, $src$$Register);
 9330   %}
 9331   ins_pipe(ialu_mem_reg);
 9332 %}
 9333 
 9334 // Or Memory with Immediate
 9335 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9336 %{
 9337   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 9338   effect(KILL cr);
 9339   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9340 
 9341   ins_cost(125);
 9342   format %{ "orl     $dst, $src\t# int" %}
 9343   ins_encode %{
 9344     __ orl($dst$$Address, $src$$constant);
 9345   %}
 9346   ins_pipe(ialu_mem_imm);
 9347 %}
 9348 
 9349 // Xor Instructions
 9350 // Xor Register with Register
 9351 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9352 %{
 9353   match(Set dst (XorI dst src));
 9354   effect(KILL cr);
 9355   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9356 
 9357   format %{ "xorl    $dst, $src\t# int" %}
 9358   ins_encode %{
 9359     __ xorl($dst$$Register, $src$$Register);
 9360   %}
 9361   ins_pipe(ialu_reg_reg);
 9362 %}
 9363 
 9364 // Xor Register with Immediate -1
 9365 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
 9366   match(Set dst (XorI dst imm));
 9367 
 9368   format %{ "not    $dst" %}
 9369   ins_encode %{
 9370      __ notl($dst$$Register);
 9371   %}
 9372   ins_pipe(ialu_reg);
 9373 %}
 9374 
 9375 // Xor Register with Immediate
 9376 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9377 %{
 9378   match(Set dst (XorI dst src));
 9379   effect(KILL cr);
 9380   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9381 
 9382   format %{ "xorl    $dst, $src\t# int" %}
 9383   ins_encode %{
 9384     __ xorl($dst$$Register, $src$$constant);
 9385   %}
 9386   ins_pipe(ialu_reg);
 9387 %}
 9388 
 9389 // Xor Register with Memory
 9390 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9391 %{
 9392   match(Set dst (XorI dst (LoadI src)));
 9393   effect(KILL cr);
 9394   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9395 
 9396   ins_cost(150);
 9397   format %{ "xorl    $dst, $src\t# int" %}
 9398   ins_encode %{
 9399     __ xorl($dst$$Register, $src$$Address);
 9400   %}
 9401   ins_pipe(ialu_reg_mem);
 9402 %}
 9403 
 9404 // Xor Memory with Register
 9405 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9406 %{
 9407   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
 9408   effect(KILL cr);
 9409   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9410 
 9411   ins_cost(150);
 9412   format %{ "xorb    $dst, $src\t# byte" %}
 9413   ins_encode %{
 9414     __ xorb($dst$$Address, $src$$Register);
 9415   %}
 9416   ins_pipe(ialu_mem_reg);
 9417 %}
 9418 
 9419 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9420 %{
 9421   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 9422   effect(KILL cr);
 9423   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9424 
 9425   ins_cost(150);
 9426   format %{ "xorl    $dst, $src\t# int" %}
 9427   ins_encode %{
 9428     __ xorl($dst$$Address, $src$$Register);
 9429   %}
 9430   ins_pipe(ialu_mem_reg);
 9431 %}
 9432 
 9433 // Xor Memory with Immediate
 9434 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9435 %{
 9436   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 9437   effect(KILL cr);
 9438   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9439 
 9440   ins_cost(125);
 9441   format %{ "xorl    $dst, $src\t# int" %}
 9442   ins_encode %{
 9443     __ xorl($dst$$Address, $src$$constant);
 9444   %}
 9445   ins_pipe(ialu_mem_imm);
 9446 %}
 9447 
 9448 
 9449 // Long Logical Instructions
 9450 
 9451 // And Instructions
 9452 // And Register with Register
 9453 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 9454 %{
 9455   match(Set dst (AndL dst src));
 9456   effect(KILL cr);
 9457   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9458 
 9459   format %{ "andq    $dst, $src\t# long" %}
 9460   ins_encode %{
 9461     __ andq($dst$$Register, $src$$Register);
 9462   %}
 9463   ins_pipe(ialu_reg_reg);
 9464 %}
 9465 
 9466 // And Register with Immediate 255
 9467 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
 9468 %{
 9469   match(Set dst (AndL src mask));
 9470 
 9471   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
 9472   ins_encode %{
 9473     // movzbl zeroes out the upper 32-bit and does not need REX.W
 9474     __ movzbl($dst$$Register, $src$$Register);
 9475   %}
 9476   ins_pipe(ialu_reg);
 9477 %}
 9478 
 9479 // And Register with Immediate 65535
 9480 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
 9481 %{
 9482   match(Set dst (AndL src mask));
 9483 
 9484   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
 9485   ins_encode %{
 9486     // movzwl zeroes out the upper 32-bit and does not need REX.W
 9487     __ movzwl($dst$$Register, $src$$Register);
 9488   %}
 9489   ins_pipe(ialu_reg);
 9490 %}
 9491 
 9492 // And Register with Immediate
 9493 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 9494 %{
 9495   match(Set dst (AndL dst src));
 9496   effect(KILL cr);
 9497   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9498 
 9499   format %{ "andq    $dst, $src\t# long" %}
 9500   ins_encode %{
 9501     __ andq($dst$$Register, $src$$constant);
 9502   %}
 9503   ins_pipe(ialu_reg);
 9504 %}
 9505 
 9506 // And Register with Memory
 9507 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 9508 %{
 9509   match(Set dst (AndL dst (LoadL src)));
 9510   effect(KILL cr);
 9511   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9512 
 9513   ins_cost(150);
 9514   format %{ "andq    $dst, $src\t# long" %}
 9515   ins_encode %{
 9516     __ andq($dst$$Register, $src$$Address);
 9517   %}
 9518   ins_pipe(ialu_reg_mem);
 9519 %}
 9520 
 9521 // And Memory with Register
 9522 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 9523 %{
 9524   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
 9525   effect(KILL cr);
 9526   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9527 
 9528   ins_cost(150);
 9529   format %{ "andq    $dst, $src\t# long" %}
 9530   ins_encode %{
 9531     __ andq($dst$$Address, $src$$Register);
 9532   %}
 9533   ins_pipe(ialu_mem_reg);
 9534 %}
 9535 
 9536 // And Memory with Immediate
 9537 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 9538 %{
 9539   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
 9540   effect(KILL cr);
 9541   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9542 
 9543   ins_cost(125);
 9544   format %{ "andq    $dst, $src\t# long" %}
 9545   ins_encode %{
 9546     __ andq($dst$$Address, $src$$constant);
 9547   %}
 9548   ins_pipe(ialu_mem_imm);
 9549 %}
 9550 
 9551 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
 9552 %{
 9553   // con should be a pure 64-bit immediate given that not(con) is a power of 2
 9554   // because AND/OR works well enough for 8/32-bit values.
 9555   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
 9556 
 9557   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
 9558   effect(KILL cr);
 9559 
 9560   ins_cost(125);
 9561   format %{ "btrq    $dst, log2(not($con))\t# long" %}
 9562   ins_encode %{
 9563     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
 9564   %}
 9565   ins_pipe(ialu_mem_imm);
 9566 %}
 9567 
 9568 // BMI1 instructions
 9569 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
 9570   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
 9571   predicate(UseBMI1Instructions);
 9572   effect(KILL cr);
 9573   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9574 
 9575   ins_cost(125);
 9576   format %{ "andnq  $dst, $src1, $src2" %}
 9577 
 9578   ins_encode %{
 9579     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
 9580   %}
 9581   ins_pipe(ialu_reg_mem);
 9582 %}
 9583 
 9584 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
 9585   match(Set dst (AndL (XorL src1 minus_1) src2));
 9586   predicate(UseBMI1Instructions);
 9587   effect(KILL cr);
 9588   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9589 
 9590   format %{ "andnq  $dst, $src1, $src2" %}
 9591 
 9592   ins_encode %{
 9593   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
 9594   %}
 9595   ins_pipe(ialu_reg_mem);
 9596 %}
 9597 
 9598 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
 9599   match(Set dst (AndL (SubL imm_zero src) src));
 9600   predicate(UseBMI1Instructions);
 9601   effect(KILL cr);
 9602   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9603 
 9604   format %{ "blsiq  $dst, $src" %}
 9605 
 9606   ins_encode %{
 9607     __ blsiq($dst$$Register, $src$$Register);
 9608   %}
 9609   ins_pipe(ialu_reg);
 9610 %}
 9611 
 9612 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
 9613   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9614   predicate(UseBMI1Instructions);
 9615   effect(KILL cr);
 9616   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9617 
 9618   ins_cost(125);
 9619   format %{ "blsiq  $dst, $src" %}
 9620 
 9621   ins_encode %{
 9622     __ blsiq($dst$$Register, $src$$Address);
 9623   %}
 9624   ins_pipe(ialu_reg_mem);
 9625 %}
 9626 
 9627 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
 9628 %{
 9629   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
 9630   predicate(UseBMI1Instructions);
 9631   effect(KILL cr);
 9632   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
 9633 
 9634   ins_cost(125);
 9635   format %{ "blsmskq $dst, $src" %}
 9636 
 9637   ins_encode %{
 9638     __ blsmskq($dst$$Register, $src$$Address);
 9639   %}
 9640   ins_pipe(ialu_reg_mem);
 9641 %}
 9642 
 9643 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
 9644 %{
 9645   match(Set dst (XorL (AddL src minus_1) src));
 9646   predicate(UseBMI1Instructions);
 9647   effect(KILL cr);
 9648   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
 9649 
 9650   format %{ "blsmskq $dst, $src" %}
 9651 
 9652   ins_encode %{
 9653     __ blsmskq($dst$$Register, $src$$Register);
 9654   %}
 9655 
 9656   ins_pipe(ialu_reg);
 9657 %}
 9658 
 9659 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
 9660 %{
 9661   match(Set dst (AndL (AddL src minus_1) src) );
 9662   predicate(UseBMI1Instructions);
 9663   effect(KILL cr);
 9664   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9665 
 9666   format %{ "blsrq  $dst, $src" %}
 9667 
 9668   ins_encode %{
 9669     __ blsrq($dst$$Register, $src$$Register);
 9670   %}
 9671 
 9672   ins_pipe(ialu_reg);
 9673 %}
 9674 
 9675 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
 9676 %{
 9677   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
 9678   predicate(UseBMI1Instructions);
 9679   effect(KILL cr);
 9680   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
 9681 
 9682   ins_cost(125);
 9683   format %{ "blsrq  $dst, $src" %}
 9684 
 9685   ins_encode %{
 9686     __ blsrq($dst$$Register, $src$$Address);
 9687   %}
 9688 
 9689   ins_pipe(ialu_reg);
 9690 %}
 9691 
 9692 // Or Instructions
 9693 // Or Register with Register
 9694 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 9695 %{
 9696   match(Set dst (OrL dst src));
 9697   effect(KILL cr);
 9698   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9699 
 9700   format %{ "orq     $dst, $src\t# long" %}
 9701   ins_encode %{
 9702     __ orq($dst$$Register, $src$$Register);
 9703   %}
 9704   ins_pipe(ialu_reg_reg);
 9705 %}
 9706 
 9707 // Use any_RegP to match R15 (TLS register) without spilling.
 9708 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
 9709   match(Set dst (OrL dst (CastP2X src)));
 9710   effect(KILL cr);
 9711   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9712 
 9713   format %{ "orq     $dst, $src\t# long" %}
 9714   ins_encode %{
 9715     __ orq($dst$$Register, $src$$Register);
 9716   %}
 9717   ins_pipe(ialu_reg_reg);
 9718 %}
 9719 
 9720 
 9721 // Or Register with Immediate
 9722 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 9723 %{
 9724   match(Set dst (OrL dst src));
 9725   effect(KILL cr);
 9726   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9727 
 9728   format %{ "orq     $dst, $src\t# long" %}
 9729   ins_encode %{
 9730     __ orq($dst$$Register, $src$$constant);
 9731   %}
 9732   ins_pipe(ialu_reg);
 9733 %}
 9734 
 9735 // Or Register with Memory
 9736 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 9737 %{
 9738   match(Set dst (OrL dst (LoadL src)));
 9739   effect(KILL cr);
 9740   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9741 
 9742   ins_cost(150);
 9743   format %{ "orq     $dst, $src\t# long" %}
 9744   ins_encode %{
 9745     __ orq($dst$$Register, $src$$Address);
 9746   %}
 9747   ins_pipe(ialu_reg_mem);
 9748 %}
 9749 
 9750 // Or Memory with Register
 9751 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 9752 %{
 9753   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
 9754   effect(KILL cr);
 9755   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9756 
 9757   ins_cost(150);
 9758   format %{ "orq     $dst, $src\t# long" %}
 9759   ins_encode %{
 9760     __ orq($dst$$Address, $src$$Register);
 9761   %}
 9762   ins_pipe(ialu_mem_reg);
 9763 %}
 9764 
 9765 // Or Memory with Immediate
 9766 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 9767 %{
 9768   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
 9769   effect(KILL cr);
 9770   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9771 
 9772   ins_cost(125);
 9773   format %{ "orq     $dst, $src\t# long" %}
 9774   ins_encode %{
 9775     __ orq($dst$$Address, $src$$constant);
 9776   %}
 9777   ins_pipe(ialu_mem_imm);
 9778 %}
 9779 
 9780 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
 9781 %{
 9782   // con should be a pure 64-bit power of 2 immediate
 9783   // because AND/OR works well enough for 8/32-bit values.
 9784   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
 9785 
 9786   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
 9787   effect(KILL cr);
 9788 
 9789   ins_cost(125);
 9790   format %{ "btsq    $dst, log2($con)\t# long" %}
 9791   ins_encode %{
 9792     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
 9793   %}
 9794   ins_pipe(ialu_mem_imm);
 9795 %}
 9796 
 9797 // Xor Instructions
 9798 // Xor Register with Register
 9799 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 9800 %{
 9801   match(Set dst (XorL dst src));
 9802   effect(KILL cr);
 9803   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9804 
 9805   format %{ "xorq    $dst, $src\t# long" %}
 9806   ins_encode %{
 9807     __ xorq($dst$$Register, $src$$Register);
 9808   %}
 9809   ins_pipe(ialu_reg_reg);
 9810 %}
 9811 
 9812 // Xor Register with Immediate -1
 9813 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
 9814   match(Set dst (XorL dst imm));
 9815 
 9816   format %{ "notq   $dst" %}
 9817   ins_encode %{
 9818      __ notq($dst$$Register);
 9819   %}
 9820   ins_pipe(ialu_reg);
 9821 %}
 9822 
 9823 // Xor Register with Immediate
 9824 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 9825 %{
 9826   match(Set dst (XorL dst src));
 9827   effect(KILL cr);
 9828   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9829 
 9830   format %{ "xorq    $dst, $src\t# long" %}
 9831   ins_encode %{
 9832     __ xorq($dst$$Register, $src$$constant);
 9833   %}
 9834   ins_pipe(ialu_reg);
 9835 %}
 9836 
 9837 // Xor Register with Memory
 9838 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 9839 %{
 9840   match(Set dst (XorL dst (LoadL src)));
 9841   effect(KILL cr);
 9842   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9843 
 9844   ins_cost(150);
 9845   format %{ "xorq    $dst, $src\t# long" %}
 9846   ins_encode %{
 9847     __ xorq($dst$$Register, $src$$Address);
 9848   %}
 9849   ins_pipe(ialu_reg_mem);
 9850 %}
 9851 
 9852 // Xor Memory with Register
 9853 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 9854 %{
 9855   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
 9856   effect(KILL cr);
 9857   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9858 
 9859   ins_cost(150);
 9860   format %{ "xorq    $dst, $src\t# long" %}
 9861   ins_encode %{
 9862     __ xorq($dst$$Address, $src$$Register);
 9863   %}
 9864   ins_pipe(ialu_mem_reg);
 9865 %}
 9866 
 9867 // Xor Memory with Immediate
 9868 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 9869 %{
 9870   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
 9871   effect(KILL cr);
 9872   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
 9873 
 9874   ins_cost(125);
 9875   format %{ "xorq    $dst, $src\t# long" %}
 9876   ins_encode %{
 9877     __ xorq($dst$$Address, $src$$constant);
 9878   %}
 9879   ins_pipe(ialu_mem_imm);
 9880 %}
 9881 
 9882 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
 9883 %{
 9884   match(Set dst (CmpLTMask p q));
 9885   effect(KILL cr);
 9886 
 9887   ins_cost(400);
 9888   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
 9889             "setlt   $dst\n\t"
 9890             "movzbl  $dst, $dst\n\t"
 9891             "negl    $dst" %}
 9892   ins_encode %{
 9893     __ cmpl($p$$Register, $q$$Register);
 9894     __ setb(Assembler::less, $dst$$Register);
 9895     __ movzbl($dst$$Register, $dst$$Register);
 9896     __ negl($dst$$Register);
 9897   %}
 9898   ins_pipe(pipe_slow);
 9899 %}
 9900 
 9901 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
 9902 %{
 9903   match(Set dst (CmpLTMask dst zero));
 9904   effect(KILL cr);
 9905 
 9906   ins_cost(100);
 9907   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
 9908   ins_encode %{
 9909     __ sarl($dst$$Register, 31);
 9910   %}
 9911   ins_pipe(ialu_reg);
 9912 %}
 9913 
 9914 /* Better to save a register than avoid a branch */
 9915 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
 9916 %{
 9917   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 9918   effect(KILL cr);
 9919   ins_cost(300);
 9920   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
 9921             "jge     done\n\t"
 9922             "addl    $p,$y\n"
 9923             "done:   " %}
 9924   ins_encode %{
 9925     Register Rp = $p$$Register;
 9926     Register Rq = $q$$Register;
 9927     Register Ry = $y$$Register;
 9928     Label done;
 9929     __ subl(Rp, Rq);
 9930     __ jccb(Assembler::greaterEqual, done);
 9931     __ addl(Rp, Ry);
 9932     __ bind(done);
 9933   %}
 9934   ins_pipe(pipe_cmplt);
 9935 %}
 9936 
 9937 /* Better to save a register than avoid a branch */
 9938 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
 9939 %{
 9940   match(Set y (AndI (CmpLTMask p q) y));
 9941   effect(KILL cr);
 9942 
 9943   ins_cost(300);
 9944 
 9945   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
 9946             "jlt     done\n\t"
 9947             "xorl    $y, $y\n"
 9948             "done:   " %}
 9949   ins_encode %{
 9950     Register Rp = $p$$Register;
 9951     Register Rq = $q$$Register;
 9952     Register Ry = $y$$Register;
 9953     Label done;
 9954     __ cmpl(Rp, Rq);
 9955     __ jccb(Assembler::less, done);
 9956     __ xorl(Ry, Ry);
 9957     __ bind(done);
 9958   %}
 9959   ins_pipe(pipe_cmplt);
 9960 %}
 9961 
 9962 
 9963 //---------- FP Instructions------------------------------------------------
 9964 
 9965 // Really expensive, avoid
 9966 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
 9967 %{
 9968   match(Set cr (CmpF src1 src2));
 9969 
 9970   ins_cost(500);
 9971   format %{ "ucomiss $src1, $src2\n\t"
 9972             "jnp,s   exit\n\t"
 9973             "pushfq\t# saw NaN, set CF\n\t"
 9974             "andq    [rsp], #0xffffff2b\n\t"
 9975             "popfq\n"
 9976     "exit:" %}
 9977   ins_encode %{
 9978     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9979     emit_cmpfp_fixup(_masm);
 9980   %}
 9981   ins_pipe(pipe_slow);
 9982 %}
 9983 
 9984 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
 9985   match(Set cr (CmpF src1 src2));
 9986 
 9987   ins_cost(100);
 9988   format %{ "ucomiss $src1, $src2" %}
 9989   ins_encode %{
 9990     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9991   %}
 9992   ins_pipe(pipe_slow);
 9993 %}
 9994 
 9995 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
 9996   match(Set cr (CmpF src1 (LoadF src2)));
 9997 
 9998   ins_cost(100);
 9999   format %{ "ucomiss $src1, $src2" %}
10000   ins_encode %{
10001     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10002   %}
10003   ins_pipe(pipe_slow);
10004 %}
10005 
10006 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10007   match(Set cr (CmpF src con));
10008   ins_cost(100);
10009   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10010   ins_encode %{
10011     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10012   %}
10013   ins_pipe(pipe_slow);
10014 %}
10015 
10016 // Really expensive, avoid
10017 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10018 %{
10019   match(Set cr (CmpD src1 src2));
10020 
10021   ins_cost(500);
10022   format %{ "ucomisd $src1, $src2\n\t"
10023             "jnp,s   exit\n\t"
10024             "pushfq\t# saw NaN, set CF\n\t"
10025             "andq    [rsp], #0xffffff2b\n\t"
10026             "popfq\n"
10027     "exit:" %}
10028   ins_encode %{
10029     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10030     emit_cmpfp_fixup(_masm);
10031   %}
10032   ins_pipe(pipe_slow);
10033 %}
10034 
10035 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10036   match(Set cr (CmpD src1 src2));
10037 
10038   ins_cost(100);
10039   format %{ "ucomisd $src1, $src2 test" %}
10040   ins_encode %{
10041     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10042   %}
10043   ins_pipe(pipe_slow);
10044 %}
10045 
10046 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10047   match(Set cr (CmpD src1 (LoadD src2)));
10048 
10049   ins_cost(100);
10050   format %{ "ucomisd $src1, $src2" %}
10051   ins_encode %{
10052     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10053   %}
10054   ins_pipe(pipe_slow);
10055 %}
10056 
10057 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10058   match(Set cr (CmpD src con));
10059   ins_cost(100);
10060   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10061   ins_encode %{
10062     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10063   %}
10064   ins_pipe(pipe_slow);
10065 %}
10066 
10067 // Compare into -1,0,1
10068 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10069 %{
10070   match(Set dst (CmpF3 src1 src2));
10071   effect(KILL cr);
10072 
10073   ins_cost(275);
10074   format %{ "ucomiss $src1, $src2\n\t"
10075             "movl    $dst, #-1\n\t"
10076             "jp,s    done\n\t"
10077             "jb,s    done\n\t"
10078             "setne   $dst\n\t"
10079             "movzbl  $dst, $dst\n"
10080     "done:" %}
10081   ins_encode %{
10082     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10083     emit_cmpfp3(_masm, $dst$$Register);
10084   %}
10085   ins_pipe(pipe_slow);
10086 %}
10087 
10088 // Compare into -1,0,1
10089 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10090 %{
10091   match(Set dst (CmpF3 src1 (LoadF src2)));
10092   effect(KILL cr);
10093 
10094   ins_cost(275);
10095   format %{ "ucomiss $src1, $src2\n\t"
10096             "movl    $dst, #-1\n\t"
10097             "jp,s    done\n\t"
10098             "jb,s    done\n\t"
10099             "setne   $dst\n\t"
10100             "movzbl  $dst, $dst\n"
10101     "done:" %}
10102   ins_encode %{
10103     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10104     emit_cmpfp3(_masm, $dst$$Register);
10105   %}
10106   ins_pipe(pipe_slow);
10107 %}
10108 
10109 // Compare into -1,0,1
10110 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10111   match(Set dst (CmpF3 src con));
10112   effect(KILL cr);
10113 
10114   ins_cost(275);
10115   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10116             "movl    $dst, #-1\n\t"
10117             "jp,s    done\n\t"
10118             "jb,s    done\n\t"
10119             "setne   $dst\n\t"
10120             "movzbl  $dst, $dst\n"
10121     "done:" %}
10122   ins_encode %{
10123     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10124     emit_cmpfp3(_masm, $dst$$Register);
10125   %}
10126   ins_pipe(pipe_slow);
10127 %}
10128 
10129 // Compare into -1,0,1
10130 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10131 %{
10132   match(Set dst (CmpD3 src1 src2));
10133   effect(KILL cr);
10134 
10135   ins_cost(275);
10136   format %{ "ucomisd $src1, $src2\n\t"
10137             "movl    $dst, #-1\n\t"
10138             "jp,s    done\n\t"
10139             "jb,s    done\n\t"
10140             "setne   $dst\n\t"
10141             "movzbl  $dst, $dst\n"
10142     "done:" %}
10143   ins_encode %{
10144     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10145     emit_cmpfp3(_masm, $dst$$Register);
10146   %}
10147   ins_pipe(pipe_slow);
10148 %}
10149 
10150 // Compare into -1,0,1
10151 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10152 %{
10153   match(Set dst (CmpD3 src1 (LoadD src2)));
10154   effect(KILL cr);
10155 
10156   ins_cost(275);
10157   format %{ "ucomisd $src1, $src2\n\t"
10158             "movl    $dst, #-1\n\t"
10159             "jp,s    done\n\t"
10160             "jb,s    done\n\t"
10161             "setne   $dst\n\t"
10162             "movzbl  $dst, $dst\n"
10163     "done:" %}
10164   ins_encode %{
10165     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10166     emit_cmpfp3(_masm, $dst$$Register);
10167   %}
10168   ins_pipe(pipe_slow);
10169 %}
10170 
10171 // Compare into -1,0,1
10172 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10173   match(Set dst (CmpD3 src con));
10174   effect(KILL cr);
10175 
10176   ins_cost(275);
10177   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10178             "movl    $dst, #-1\n\t"
10179             "jp,s    done\n\t"
10180             "jb,s    done\n\t"
10181             "setne   $dst\n\t"
10182             "movzbl  $dst, $dst\n"
10183     "done:" %}
10184   ins_encode %{
10185     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10186     emit_cmpfp3(_masm, $dst$$Register);
10187   %}
10188   ins_pipe(pipe_slow);
10189 %}
10190 
10191 //----------Arithmetic Conversion Instructions---------------------------------
10192 
10193 instruct convF2D_reg_reg(regD dst, regF src)
10194 %{
10195   match(Set dst (ConvF2D src));
10196   effect(TEMP dst);
10197   format %{ "cvtss2sd $dst, $src" %}
10198   ins_encode %{
10199     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10200   %}
10201   ins_pipe(pipe_slow); // XXX
10202 %}
10203 
10204 instruct convF2D_reg_mem(regD dst, memory src)
10205 %{
10206   match(Set dst (ConvF2D (LoadF src)));
10207 
10208   format %{ "cvtss2sd $dst, $src" %}
10209   ins_encode %{
10210     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
10211   %}
10212   ins_pipe(pipe_slow); // XXX
10213 %}
10214 
10215 instruct convD2F_reg_reg(regF dst, regD src)
10216 %{
10217   match(Set dst (ConvD2F src));
10218   effect(TEMP dst);
10219   format %{ "cvtsd2ss $dst, $src" %}
10220   ins_encode %{
10221     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10222   %}
10223   ins_pipe(pipe_slow); // XXX
10224 %}
10225 
10226 instruct convD2F_reg_mem(regF dst, memory src)
10227 %{
10228   match(Set dst (ConvD2F (LoadD src)));
10229 
10230   format %{ "cvtsd2ss $dst, $src" %}
10231   ins_encode %{
10232     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
10233   %}
10234   ins_pipe(pipe_slow); // XXX
10235 %}
10236 
10237 // XXX do mem variants
10238 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
10239 %{
10240   match(Set dst (ConvF2I src));
10241   effect(KILL cr);
10242   format %{ "convert_f2i $dst, $src" %}
10243   ins_encode %{
10244     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
10245   %}
10246   ins_pipe(pipe_slow);
10247 %}
10248 
10249 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
10250 %{
10251   match(Set dst (ConvF2L src));
10252   effect(KILL cr);
10253   format %{ "convert_f2l $dst, $src"%}
10254   ins_encode %{
10255     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
10256   %}
10257   ins_pipe(pipe_slow);
10258 %}
10259 
10260 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
10261 %{
10262   match(Set dst (ConvD2I src));
10263   effect(KILL cr);
10264   format %{ "convert_d2i $dst, $src"%}
10265   ins_encode %{
10266     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
10267   %}
10268   ins_pipe(pipe_slow);
10269 %}
10270 
10271 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
10272 %{
10273   match(Set dst (ConvD2L src));
10274   effect(KILL cr);
10275   format %{ "convert_d2l $dst, $src"%}
10276   ins_encode %{
10277     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
10278   %}
10279   ins_pipe(pipe_slow);
10280 %}
10281 
10282 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
10283 %{
10284   match(Set dst (RoundD src));
10285   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
10286   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
10287   ins_encode %{
10288     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
10289   %}
10290   ins_pipe(pipe_slow);
10291 %}
10292 
10293 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
10294 %{
10295   match(Set dst (RoundF src));
10296   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
10297   format %{ "round_float $dst,$src" %}
10298   ins_encode %{
10299     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
10300   %}
10301   ins_pipe(pipe_slow);
10302 %}
10303 
10304 instruct convI2F_reg_reg(regF dst, rRegI src)
10305 %{
10306   predicate(!UseXmmI2F);
10307   match(Set dst (ConvI2F src));
10308 
10309   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10310   ins_encode %{
10311     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10312   %}
10313   ins_pipe(pipe_slow); // XXX
10314 %}
10315 
10316 instruct convI2F_reg_mem(regF dst, memory src)
10317 %{
10318   match(Set dst (ConvI2F (LoadI src)));
10319 
10320   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
10321   ins_encode %{
10322     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
10323   %}
10324   ins_pipe(pipe_slow); // XXX
10325 %}
10326 
10327 instruct convI2D_reg_reg(regD dst, rRegI src)
10328 %{
10329   predicate(!UseXmmI2D);
10330   match(Set dst (ConvI2D src));
10331 
10332   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10333   ins_encode %{
10334     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10335   %}
10336   ins_pipe(pipe_slow); // XXX
10337 %}
10338 
10339 instruct convI2D_reg_mem(regD dst, memory src)
10340 %{
10341   match(Set dst (ConvI2D (LoadI src)));
10342 
10343   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
10344   ins_encode %{
10345     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
10346   %}
10347   ins_pipe(pipe_slow); // XXX
10348 %}
10349 
10350 instruct convXI2F_reg(regF dst, rRegI src)
10351 %{
10352   predicate(UseXmmI2F);
10353   match(Set dst (ConvI2F src));
10354 
10355   format %{ "movdl $dst, $src\n\t"
10356             "cvtdq2psl $dst, $dst\t# i2f" %}
10357   ins_encode %{
10358     __ movdl($dst$$XMMRegister, $src$$Register);
10359     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10360   %}
10361   ins_pipe(pipe_slow); // XXX
10362 %}
10363 
10364 instruct convXI2D_reg(regD dst, rRegI src)
10365 %{
10366   predicate(UseXmmI2D);
10367   match(Set dst (ConvI2D src));
10368 
10369   format %{ "movdl $dst, $src\n\t"
10370             "cvtdq2pdl $dst, $dst\t# i2d" %}
10371   ins_encode %{
10372     __ movdl($dst$$XMMRegister, $src$$Register);
10373     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10374   %}
10375   ins_pipe(pipe_slow); // XXX
10376 %}
10377 
10378 instruct convL2F_reg_reg(regF dst, rRegL src)
10379 %{
10380   match(Set dst (ConvL2F src));
10381 
10382   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10383   ins_encode %{
10384     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
10385   %}
10386   ins_pipe(pipe_slow); // XXX
10387 %}
10388 
10389 instruct convL2F_reg_mem(regF dst, memory src)
10390 %{
10391   match(Set dst (ConvL2F (LoadL src)));
10392 
10393   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
10394   ins_encode %{
10395     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
10396   %}
10397   ins_pipe(pipe_slow); // XXX
10398 %}
10399 
10400 instruct convL2D_reg_reg(regD dst, rRegL src)
10401 %{
10402   match(Set dst (ConvL2D src));
10403 
10404   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10405   ins_encode %{
10406     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
10407   %}
10408   ins_pipe(pipe_slow); // XXX
10409 %}
10410 
10411 instruct convL2D_reg_mem(regD dst, memory src)
10412 %{
10413   match(Set dst (ConvL2D (LoadL src)));
10414 
10415   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
10416   ins_encode %{
10417     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
10418   %}
10419   ins_pipe(pipe_slow); // XXX
10420 %}
10421 
10422 instruct convI2L_reg_reg(rRegL dst, rRegI src)
10423 %{
10424   match(Set dst (ConvI2L src));
10425 
10426   ins_cost(125);
10427   format %{ "movslq  $dst, $src\t# i2l" %}
10428   ins_encode %{
10429     __ movslq($dst$$Register, $src$$Register);
10430   %}
10431   ins_pipe(ialu_reg_reg);
10432 %}
10433 
10434 // Zero-extend convert int to long
10435 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
10436 %{
10437   match(Set dst (AndL (ConvI2L src) mask));
10438 
10439   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10440   ins_encode %{
10441     if ($dst$$reg != $src$$reg) {
10442       __ movl($dst$$Register, $src$$Register);
10443     }
10444   %}
10445   ins_pipe(ialu_reg_reg);
10446 %}
10447 
10448 // Zero-extend convert int to long
10449 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
10450 %{
10451   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
10452 
10453   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
10454   ins_encode %{
10455     __ movl($dst$$Register, $src$$Address);
10456   %}
10457   ins_pipe(ialu_reg_mem);
10458 %}
10459 
10460 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
10461 %{
10462   match(Set dst (AndL src mask));
10463 
10464   format %{ "movl    $dst, $src\t# zero-extend long" %}
10465   ins_encode %{
10466     __ movl($dst$$Register, $src$$Register);
10467   %}
10468   ins_pipe(ialu_reg_reg);
10469 %}
10470 
10471 instruct convL2I_reg_reg(rRegI dst, rRegL src)
10472 %{
10473   match(Set dst (ConvL2I src));
10474 
10475   format %{ "movl    $dst, $src\t# l2i" %}
10476   ins_encode %{
10477     __ movl($dst$$Register, $src$$Register);
10478   %}
10479   ins_pipe(ialu_reg_reg);
10480 %}
10481 
10482 
10483 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10484   match(Set dst (MoveF2I src));
10485   effect(DEF dst, USE src);
10486 
10487   ins_cost(125);
10488   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
10489   ins_encode %{
10490     __ movl($dst$$Register, Address(rsp, $src$$disp));
10491   %}
10492   ins_pipe(ialu_reg_mem);
10493 %}
10494 
10495 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
10496   match(Set dst (MoveI2F src));
10497   effect(DEF dst, USE src);
10498 
10499   ins_cost(125);
10500   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
10501   ins_encode %{
10502     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
10503   %}
10504   ins_pipe(pipe_slow);
10505 %}
10506 
10507 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
10508   match(Set dst (MoveD2L src));
10509   effect(DEF dst, USE src);
10510 
10511   ins_cost(125);
10512   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
10513   ins_encode %{
10514     __ movq($dst$$Register, Address(rsp, $src$$disp));
10515   %}
10516   ins_pipe(ialu_reg_mem);
10517 %}
10518 
10519 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
10520   predicate(!UseXmmLoadAndClearUpper);
10521   match(Set dst (MoveL2D src));
10522   effect(DEF dst, USE src);
10523 
10524   ins_cost(125);
10525   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
10526   ins_encode %{
10527     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10528   %}
10529   ins_pipe(pipe_slow);
10530 %}
10531 
10532 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
10533   predicate(UseXmmLoadAndClearUpper);
10534   match(Set dst (MoveL2D src));
10535   effect(DEF dst, USE src);
10536 
10537   ins_cost(125);
10538   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
10539   ins_encode %{
10540     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
10541   %}
10542   ins_pipe(pipe_slow);
10543 %}
10544 
10545 
10546 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
10547   match(Set dst (MoveF2I src));
10548   effect(DEF dst, USE src);
10549 
10550   ins_cost(95); // XXX
10551   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
10552   ins_encode %{
10553     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
10554   %}
10555   ins_pipe(pipe_slow);
10556 %}
10557 
10558 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
10559   match(Set dst (MoveI2F src));
10560   effect(DEF dst, USE src);
10561 
10562   ins_cost(100);
10563   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
10564   ins_encode %{
10565     __ movl(Address(rsp, $dst$$disp), $src$$Register);
10566   %}
10567   ins_pipe( ialu_mem_reg );
10568 %}
10569 
10570 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
10571   match(Set dst (MoveD2L src));
10572   effect(DEF dst, USE src);
10573 
10574   ins_cost(95); // XXX
10575   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
10576   ins_encode %{
10577     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
10578   %}
10579   ins_pipe(pipe_slow);
10580 %}
10581 
10582 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
10583   match(Set dst (MoveL2D src));
10584   effect(DEF dst, USE src);
10585 
10586   ins_cost(100);
10587   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
10588   ins_encode %{
10589     __ movq(Address(rsp, $dst$$disp), $src$$Register);
10590   %}
10591   ins_pipe(ialu_mem_reg);
10592 %}
10593 
10594 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
10595   match(Set dst (MoveF2I src));
10596   effect(DEF dst, USE src);
10597   ins_cost(85);
10598   format %{ "movd    $dst,$src\t# MoveF2I" %}
10599   ins_encode %{
10600     __ movdl($dst$$Register, $src$$XMMRegister);
10601   %}
10602   ins_pipe( pipe_slow );
10603 %}
10604 
10605 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
10606   match(Set dst (MoveD2L src));
10607   effect(DEF dst, USE src);
10608   ins_cost(85);
10609   format %{ "movd    $dst,$src\t# MoveD2L" %}
10610   ins_encode %{
10611     __ movdq($dst$$Register, $src$$XMMRegister);
10612   %}
10613   ins_pipe( pipe_slow );
10614 %}
10615 
10616 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
10617   match(Set dst (MoveI2F src));
10618   effect(DEF dst, USE src);
10619   ins_cost(100);
10620   format %{ "movd    $dst,$src\t# MoveI2F" %}
10621   ins_encode %{
10622     __ movdl($dst$$XMMRegister, $src$$Register);
10623   %}
10624   ins_pipe( pipe_slow );
10625 %}
10626 
10627 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10628   match(Set dst (MoveL2D src));
10629   effect(DEF dst, USE src);
10630   ins_cost(100);
10631   format %{ "movd    $dst,$src\t# MoveL2D" %}
10632   ins_encode %{
10633      __ movdq($dst$$XMMRegister, $src$$Register);
10634   %}
10635   ins_pipe( pipe_slow );
10636 %}
10637 
10638 
10639 // Fast clearing of an array
10640 // Small ClearArray non-AVX512.
10641 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10642                   Universe dummy, rFlagsReg cr)
10643 %{
10644   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10645   match(Set dummy (ClearArray (Binary cnt base) val));
10646   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10647 
10648   format %{ $$template
10649     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10650     $$emit$$"jg      LARGE\n\t"
10651     $$emit$$"dec     rcx\n\t"
10652     $$emit$$"js      DONE\t# Zero length\n\t"
10653     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10654     $$emit$$"dec     rcx\n\t"
10655     $$emit$$"jge     LOOP\n\t"
10656     $$emit$$"jmp     DONE\n\t"
10657     $$emit$$"# LARGE:\n\t"
10658     if (UseFastStosb) {
10659        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10660        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10661     } else if (UseXMMForObjInit) {
10662        $$emit$$"movdq   $tmp, $val\n\t"
10663        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10664        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10665        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10666        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10667        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10668        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10669        $$emit$$"add     0x40,rax\n\t"
10670        $$emit$$"# L_zero_64_bytes:\n\t"
10671        $$emit$$"sub     0x8,rcx\n\t"
10672        $$emit$$"jge     L_loop\n\t"
10673        $$emit$$"add     0x4,rcx\n\t"
10674        $$emit$$"jl      L_tail\n\t"
10675        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10676        $$emit$$"add     0x20,rax\n\t"
10677        $$emit$$"sub     0x4,rcx\n\t"
10678        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10679        $$emit$$"add     0x4,rcx\n\t"
10680        $$emit$$"jle     L_end\n\t"
10681        $$emit$$"dec     rcx\n\t"
10682        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10683        $$emit$$"vmovq   xmm0,(rax)\n\t"
10684        $$emit$$"add     0x8,rax\n\t"
10685        $$emit$$"dec     rcx\n\t"
10686        $$emit$$"jge     L_sloop\n\t"
10687        $$emit$$"# L_end:\n\t"
10688     } else {
10689        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10690     }
10691     $$emit$$"# DONE"
10692   %}
10693   ins_encode %{
10694     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10695                  $tmp$$XMMRegister, false, false);
10696   %}
10697   ins_pipe(pipe_slow);
10698 %}
10699 
10700 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10701                             Universe dummy, rFlagsReg cr)
10702 %{
10703   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10704   match(Set dummy (ClearArray (Binary cnt base) val));
10705   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10706 
10707   format %{ $$template
10708     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10709     $$emit$$"jg      LARGE\n\t"
10710     $$emit$$"dec     rcx\n\t"
10711     $$emit$$"js      DONE\t# Zero length\n\t"
10712     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10713     $$emit$$"dec     rcx\n\t"
10714     $$emit$$"jge     LOOP\n\t"
10715     $$emit$$"jmp     DONE\n\t"
10716     $$emit$$"# LARGE:\n\t"
10717     if (UseXMMForObjInit) {
10718        $$emit$$"movdq   $tmp, $val\n\t"
10719        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10720        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10721        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10722        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10723        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10724        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10725        $$emit$$"add     0x40,rax\n\t"
10726        $$emit$$"# L_zero_64_bytes:\n\t"
10727        $$emit$$"sub     0x8,rcx\n\t"
10728        $$emit$$"jge     L_loop\n\t"
10729        $$emit$$"add     0x4,rcx\n\t"
10730        $$emit$$"jl      L_tail\n\t"
10731        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10732        $$emit$$"add     0x20,rax\n\t"
10733        $$emit$$"sub     0x4,rcx\n\t"
10734        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10735        $$emit$$"add     0x4,rcx\n\t"
10736        $$emit$$"jle     L_end\n\t"
10737        $$emit$$"dec     rcx\n\t"
10738        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10739        $$emit$$"vmovq   xmm0,(rax)\n\t"
10740        $$emit$$"add     0x8,rax\n\t"
10741        $$emit$$"dec     rcx\n\t"
10742        $$emit$$"jge     L_sloop\n\t"
10743        $$emit$$"# L_end:\n\t"
10744     } else {
10745        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10746     }
10747     $$emit$$"# DONE"
10748   %}
10749   ins_encode %{
10750     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10751                  $tmp$$XMMRegister, false, true);
10752   %}
10753   ins_pipe(pipe_slow);
10754 %}
10755 
10756 // Small ClearArray AVX512 non-constant length.
10757 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10758                        Universe dummy, rFlagsReg cr)
10759 %{
10760   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10761   match(Set dummy (ClearArray (Binary cnt base) val));
10762   ins_cost(125);
10763   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10764 
10765   format %{ $$template
10766     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10767     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10768     $$emit$$"jg      LARGE\n\t"
10769     $$emit$$"dec     rcx\n\t"
10770     $$emit$$"js      DONE\t# Zero length\n\t"
10771     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10772     $$emit$$"dec     rcx\n\t"
10773     $$emit$$"jge     LOOP\n\t"
10774     $$emit$$"jmp     DONE\n\t"
10775     $$emit$$"# LARGE:\n\t"
10776     if (UseFastStosb) {
10777        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10778        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10779     } else if (UseXMMForObjInit) {
10780        $$emit$$"mov     rdi,rax\n\t"
10781        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10782        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10783        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10784        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10785        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10786        $$emit$$"add     0x40,rax\n\t"
10787        $$emit$$"# L_zero_64_bytes:\n\t"
10788        $$emit$$"sub     0x8,rcx\n\t"
10789        $$emit$$"jge     L_loop\n\t"
10790        $$emit$$"add     0x4,rcx\n\t"
10791        $$emit$$"jl      L_tail\n\t"
10792        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10793        $$emit$$"add     0x20,rax\n\t"
10794        $$emit$$"sub     0x4,rcx\n\t"
10795        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10796        $$emit$$"add     0x4,rcx\n\t"
10797        $$emit$$"jle     L_end\n\t"
10798        $$emit$$"dec     rcx\n\t"
10799        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10800        $$emit$$"vmovq   xmm0,(rax)\n\t"
10801        $$emit$$"add     0x8,rax\n\t"
10802        $$emit$$"dec     rcx\n\t"
10803        $$emit$$"jge     L_sloop\n\t"
10804        $$emit$$"# L_end:\n\t"
10805     } else {
10806        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10807     }
10808     $$emit$$"# DONE"
10809   %}
10810   ins_encode %{
10811     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10812                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
10813   %}
10814   ins_pipe(pipe_slow);
10815 %}
10816 
10817 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10818                                  Universe dummy, rFlagsReg cr)
10819 %{
10820   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10821   match(Set dummy (ClearArray (Binary cnt base) val));
10822   ins_cost(125);
10823   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10824 
10825   format %{ $$template
10826     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10827     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10828     $$emit$$"jg      LARGE\n\t"
10829     $$emit$$"dec     rcx\n\t"
10830     $$emit$$"js      DONE\t# Zero length\n\t"
10831     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10832     $$emit$$"dec     rcx\n\t"
10833     $$emit$$"jge     LOOP\n\t"
10834     $$emit$$"jmp     DONE\n\t"
10835     $$emit$$"# LARGE:\n\t"
10836     if (UseFastStosb) {
10837        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10838        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10839     } else if (UseXMMForObjInit) {
10840        $$emit$$"mov     rdi,rax\n\t"
10841        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10842        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10843        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10844        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10845        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10846        $$emit$$"add     0x40,rax\n\t"
10847        $$emit$$"# L_zero_64_bytes:\n\t"
10848        $$emit$$"sub     0x8,rcx\n\t"
10849        $$emit$$"jge     L_loop\n\t"
10850        $$emit$$"add     0x4,rcx\n\t"
10851        $$emit$$"jl      L_tail\n\t"
10852        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10853        $$emit$$"add     0x20,rax\n\t"
10854        $$emit$$"sub     0x4,rcx\n\t"
10855        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10856        $$emit$$"add     0x4,rcx\n\t"
10857        $$emit$$"jle     L_end\n\t"
10858        $$emit$$"dec     rcx\n\t"
10859        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10860        $$emit$$"vmovq   xmm0,(rax)\n\t"
10861        $$emit$$"add     0x8,rax\n\t"
10862        $$emit$$"dec     rcx\n\t"
10863        $$emit$$"jge     L_sloop\n\t"
10864        $$emit$$"# L_end:\n\t"
10865     } else {
10866        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10867     }
10868     $$emit$$"# DONE"
10869   %}
10870   ins_encode %{
10871     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10872                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
10873   %}
10874   ins_pipe(pipe_slow);
10875 %}
10876 
10877 // Large ClearArray non-AVX512.
10878 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10879                         Universe dummy, rFlagsReg cr)
10880 %{
10881   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10882   match(Set dummy (ClearArray (Binary cnt base) val));
10883   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10884 
10885   format %{ $$template
10886     if (UseFastStosb) {
10887        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10888        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10889     } else if (UseXMMForObjInit) {
10890        $$emit$$"movdq   $tmp, $val\n\t"
10891        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10892        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10893        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10894        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10895        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10896        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10897        $$emit$$"add     0x40,rax\n\t"
10898        $$emit$$"# L_zero_64_bytes:\n\t"
10899        $$emit$$"sub     0x8,rcx\n\t"
10900        $$emit$$"jge     L_loop\n\t"
10901        $$emit$$"add     0x4,rcx\n\t"
10902        $$emit$$"jl      L_tail\n\t"
10903        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10904        $$emit$$"add     0x20,rax\n\t"
10905        $$emit$$"sub     0x4,rcx\n\t"
10906        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10907        $$emit$$"add     0x4,rcx\n\t"
10908        $$emit$$"jle     L_end\n\t"
10909        $$emit$$"dec     rcx\n\t"
10910        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10911        $$emit$$"vmovq   xmm0,(rax)\n\t"
10912        $$emit$$"add     0x8,rax\n\t"
10913        $$emit$$"dec     rcx\n\t"
10914        $$emit$$"jge     L_sloop\n\t"
10915        $$emit$$"# L_end:\n\t"
10916     } else {
10917        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10918     }
10919   %}
10920   ins_encode %{
10921     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10922                  $tmp$$XMMRegister, true, false);
10923   %}
10924   ins_pipe(pipe_slow);
10925 %}
10926 
10927 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10928                                   Universe dummy, rFlagsReg cr)
10929 %{
10930   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
10931   match(Set dummy (ClearArray (Binary cnt base) val));
10932   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
10933 
10934   format %{ $$template
10935     if (UseXMMForObjInit) {
10936        $$emit$$"movdq   $tmp, $val\n\t"
10937        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10938        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10939        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10940        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10941        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10942        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10943        $$emit$$"add     0x40,rax\n\t"
10944        $$emit$$"# L_zero_64_bytes:\n\t"
10945        $$emit$$"sub     0x8,rcx\n\t"
10946        $$emit$$"jge     L_loop\n\t"
10947        $$emit$$"add     0x4,rcx\n\t"
10948        $$emit$$"jl      L_tail\n\t"
10949        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10950        $$emit$$"add     0x20,rax\n\t"
10951        $$emit$$"sub     0x4,rcx\n\t"
10952        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10953        $$emit$$"add     0x4,rcx\n\t"
10954        $$emit$$"jle     L_end\n\t"
10955        $$emit$$"dec     rcx\n\t"
10956        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10957        $$emit$$"vmovq   xmm0,(rax)\n\t"
10958        $$emit$$"add     0x8,rax\n\t"
10959        $$emit$$"dec     rcx\n\t"
10960        $$emit$$"jge     L_sloop\n\t"
10961        $$emit$$"# L_end:\n\t"
10962     } else {
10963        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
10964     }
10965   %}
10966   ins_encode %{
10967     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10968                  $tmp$$XMMRegister, true, true);
10969   %}
10970   ins_pipe(pipe_slow);
10971 %}
10972 
10973 // Large ClearArray AVX512.
10974 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
10975                              Universe dummy, rFlagsReg cr)
10976 %{
10977   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
10978   match(Set dummy (ClearArray (Binary cnt base) val));
10979   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
10980 
10981   format %{ $$template
10982     if (UseFastStosb) {
10983        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10984        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10985        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10986     } else if (UseXMMForObjInit) {
10987        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10988        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
10989        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10990        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10991        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10992        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10993        $$emit$$"add     0x40,rax\n\t"
10994        $$emit$$"# L_zero_64_bytes:\n\t"
10995        $$emit$$"sub     0x8,rcx\n\t"
10996        $$emit$$"jge     L_loop\n\t"
10997        $$emit$$"add     0x4,rcx\n\t"
10998        $$emit$$"jl      L_tail\n\t"
10999        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11000        $$emit$$"add     0x20,rax\n\t"
11001        $$emit$$"sub     0x4,rcx\n\t"
11002        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11003        $$emit$$"add     0x4,rcx\n\t"
11004        $$emit$$"jle     L_end\n\t"
11005        $$emit$$"dec     rcx\n\t"
11006        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11007        $$emit$$"vmovq   xmm0,(rax)\n\t"
11008        $$emit$$"add     0x8,rax\n\t"
11009        $$emit$$"dec     rcx\n\t"
11010        $$emit$$"jge     L_sloop\n\t"
11011        $$emit$$"# L_end:\n\t"
11012     } else {
11013        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11014        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11015     }
11016   %}
11017   ins_encode %{
11018     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11019                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11020   %}
11021   ins_pipe(pipe_slow);
11022 %}
11023 
11024 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11025                                        Universe dummy, rFlagsReg cr)
11026 %{
11027   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11028   match(Set dummy (ClearArray (Binary cnt base) val));
11029   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11030 
11031   format %{ $$template
11032     if (UseFastStosb) {
11033        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11034        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11035        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11036     } else if (UseXMMForObjInit) {
11037        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11038        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11039        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11040        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11041        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11042        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11043        $$emit$$"add     0x40,rax\n\t"
11044        $$emit$$"# L_zero_64_bytes:\n\t"
11045        $$emit$$"sub     0x8,rcx\n\t"
11046        $$emit$$"jge     L_loop\n\t"
11047        $$emit$$"add     0x4,rcx\n\t"
11048        $$emit$$"jl      L_tail\n\t"
11049        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11050        $$emit$$"add     0x20,rax\n\t"
11051        $$emit$$"sub     0x4,rcx\n\t"
11052        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11053        $$emit$$"add     0x4,rcx\n\t"
11054        $$emit$$"jle     L_end\n\t"
11055        $$emit$$"dec     rcx\n\t"
11056        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11057        $$emit$$"vmovq   xmm0,(rax)\n\t"
11058        $$emit$$"add     0x8,rax\n\t"
11059        $$emit$$"dec     rcx\n\t"
11060        $$emit$$"jge     L_sloop\n\t"
11061        $$emit$$"# L_end:\n\t"
11062     } else {
11063        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11064        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11065     }
11066   %}
11067   ins_encode %{
11068     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11069                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11070   %}
11071   ins_pipe(pipe_slow);
11072 %}
11073 
11074 // Small ClearArray AVX512 constant length.
11075 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11076 %{
11077   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11078             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11079   match(Set dummy (ClearArray (Binary cnt base) val));
11080   ins_cost(100);
11081   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11082   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11083   ins_encode %{
11084     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11085   %}
11086   ins_pipe(pipe_slow);
11087 %}
11088 
11089 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11090                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11091 %{
11092   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11093   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11094   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11095 
11096   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11097   ins_encode %{
11098     __ string_compare($str1$$Register, $str2$$Register,
11099                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11100                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11101   %}
11102   ins_pipe( pipe_slow );
11103 %}
11104 
11105 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11106                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11107 %{
11108   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11109   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11110   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11111 
11112   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11113   ins_encode %{
11114     __ string_compare($str1$$Register, $str2$$Register,
11115                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11116                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11117   %}
11118   ins_pipe( pipe_slow );
11119 %}
11120 
11121 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11122                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11123 %{
11124   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11125   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11126   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11127 
11128   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11129   ins_encode %{
11130     __ string_compare($str1$$Register, $str2$$Register,
11131                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11132                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11133   %}
11134   ins_pipe( pipe_slow );
11135 %}
11136 
11137 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11138                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11139 %{
11140   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11141   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11142   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11143 
11144   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11145   ins_encode %{
11146     __ string_compare($str1$$Register, $str2$$Register,
11147                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11148                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11149   %}
11150   ins_pipe( pipe_slow );
11151 %}
11152 
11153 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11154                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11155 %{
11156   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11157   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11158   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11159 
11160   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11161   ins_encode %{
11162     __ string_compare($str1$$Register, $str2$$Register,
11163                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11164                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11165   %}
11166   ins_pipe( pipe_slow );
11167 %}
11168 
11169 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11170                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11171 %{
11172   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11173   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11174   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11175 
11176   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11177   ins_encode %{
11178     __ string_compare($str1$$Register, $str2$$Register,
11179                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11180                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11181   %}
11182   ins_pipe( pipe_slow );
11183 %}
11184 
11185 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11186                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11187 %{
11188   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11189   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11190   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11191 
11192   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11193   ins_encode %{
11194     __ string_compare($str2$$Register, $str1$$Register,
11195                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11196                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11197   %}
11198   ins_pipe( pipe_slow );
11199 %}
11200 
11201 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11202                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11203 %{
11204   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11205   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11206   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11207 
11208   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11209   ins_encode %{
11210     __ string_compare($str2$$Register, $str1$$Register,
11211                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11212                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11213   %}
11214   ins_pipe( pipe_slow );
11215 %}
11216 
11217 // fast search of substring with known size.
11218 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11219                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11220 %{
11221   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11222   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11223   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11224 
11225   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11226   ins_encode %{
11227     int icnt2 = (int)$int_cnt2$$constant;
11228     if (icnt2 >= 16) {
11229       // IndexOf for constant substrings with size >= 16 elements
11230       // which don't need to be loaded through stack.
11231       __ string_indexofC8($str1$$Register, $str2$$Register,
11232                           $cnt1$$Register, $cnt2$$Register,
11233                           icnt2, $result$$Register,
11234                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11235     } else {
11236       // Small strings are loaded through stack if they cross page boundary.
11237       __ string_indexof($str1$$Register, $str2$$Register,
11238                         $cnt1$$Register, $cnt2$$Register,
11239                         icnt2, $result$$Register,
11240                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11241     }
11242   %}
11243   ins_pipe( pipe_slow );
11244 %}
11245 
11246 // fast search of substring with known size.
11247 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11248                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11249 %{
11250   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11251   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11252   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11253 
11254   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11255   ins_encode %{
11256     int icnt2 = (int)$int_cnt2$$constant;
11257     if (icnt2 >= 8) {
11258       // IndexOf for constant substrings with size >= 8 elements
11259       // which don't need to be loaded through stack.
11260       __ string_indexofC8($str1$$Register, $str2$$Register,
11261                           $cnt1$$Register, $cnt2$$Register,
11262                           icnt2, $result$$Register,
11263                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11264     } else {
11265       // Small strings are loaded through stack if they cross page boundary.
11266       __ string_indexof($str1$$Register, $str2$$Register,
11267                         $cnt1$$Register, $cnt2$$Register,
11268                         icnt2, $result$$Register,
11269                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11270     }
11271   %}
11272   ins_pipe( pipe_slow );
11273 %}
11274 
11275 // fast search of substring with known size.
11276 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
11277                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
11278 %{
11279   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11280   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11281   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11282 
11283   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
11284   ins_encode %{
11285     int icnt2 = (int)$int_cnt2$$constant;
11286     if (icnt2 >= 8) {
11287       // IndexOf for constant substrings with size >= 8 elements
11288       // which don't need to be loaded through stack.
11289       __ string_indexofC8($str1$$Register, $str2$$Register,
11290                           $cnt1$$Register, $cnt2$$Register,
11291                           icnt2, $result$$Register,
11292                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11293     } else {
11294       // Small strings are loaded through stack if they cross page boundary.
11295       __ string_indexof($str1$$Register, $str2$$Register,
11296                         $cnt1$$Register, $cnt2$$Register,
11297                         icnt2, $result$$Register,
11298                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11299     }
11300   %}
11301   ins_pipe( pipe_slow );
11302 %}
11303 
11304 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11305                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11306 %{
11307   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11308   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11309   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11310 
11311   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11312   ins_encode %{
11313     __ string_indexof($str1$$Register, $str2$$Register,
11314                       $cnt1$$Register, $cnt2$$Register,
11315                       (-1), $result$$Register,
11316                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11317   %}
11318   ins_pipe( pipe_slow );
11319 %}
11320 
11321 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11322                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11323 %{
11324   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11325   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11326   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11327 
11328   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11329   ins_encode %{
11330     __ string_indexof($str1$$Register, $str2$$Register,
11331                       $cnt1$$Register, $cnt2$$Register,
11332                       (-1), $result$$Register,
11333                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11334   %}
11335   ins_pipe( pipe_slow );
11336 %}
11337 
11338 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
11339                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
11340 %{
11341   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11342   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11343   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11344 
11345   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11346   ins_encode %{
11347     __ string_indexof($str1$$Register, $str2$$Register,
11348                       $cnt1$$Register, $cnt2$$Register,
11349                       (-1), $result$$Register,
11350                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11351   %}
11352   ins_pipe( pipe_slow );
11353 %}
11354 
11355 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11356                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11357 %{
11358   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11359   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11360   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11361   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11362   ins_encode %{
11363     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11364                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
11370                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
11371 %{
11372   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11373   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11374   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11375   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11376   ins_encode %{
11377     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11378                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
11379   %}
11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 // fast string equals
11384 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11385                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
11386 %{
11387   predicate(!VM_Version::supports_avx512vlbw());
11388   match(Set result (StrEquals (Binary str1 str2) cnt));
11389   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11390 
11391   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11392   ins_encode %{
11393     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11394                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11395                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11396   %}
11397   ins_pipe( pipe_slow );
11398 %}
11399 
11400 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
11401                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
11402 %{
11403   predicate(VM_Version::supports_avx512vlbw());
11404   match(Set result (StrEquals (Binary str1 str2) cnt));
11405   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11406 
11407   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11408   ins_encode %{
11409     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11410                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11411                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11412   %}
11413   ins_pipe( pipe_slow );
11414 %}
11415 
11416 // fast array equals
11417 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11418                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11419 %{
11420   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11421   match(Set result (AryEq ary1 ary2));
11422   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11423 
11424   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11425   ins_encode %{
11426     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11427                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11428                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11429   %}
11430   ins_pipe( pipe_slow );
11431 %}
11432 
11433 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11434                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11435 %{
11436   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11437   match(Set result (AryEq ary1 ary2));
11438   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11439 
11440   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11441   ins_encode %{
11442     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11443                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11444                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11445   %}
11446   ins_pipe( pipe_slow );
11447 %}
11448 
11449 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11450                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11451 %{
11452   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11453   match(Set result (AryEq ary1 ary2));
11454   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11455 
11456   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11457   ins_encode %{
11458     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11459                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11460                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
11461   %}
11462   ins_pipe( pipe_slow );
11463 %}
11464 
11465 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
11466                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
11467 %{
11468   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11469   match(Set result (AryEq ary1 ary2));
11470   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11471 
11472   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11473   ins_encode %{
11474     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11475                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11476                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
11477   %}
11478   ins_pipe( pipe_slow );
11479 %}
11480 
11481 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
11482                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
11483                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
11484                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
11485                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
11486 %{
11487   predicate(UseAVX >= 2);
11488   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
11489   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
11490          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
11491          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
11492          USE basic_type, KILL cr);
11493 
11494   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
11495   ins_encode %{
11496     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
11497                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
11498                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
11499                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
11500                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
11501                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
11502                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
11503   %}
11504   ins_pipe( pipe_slow );
11505 %}
11506 
11507 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
11508                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
11509 %{
11510   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
11511   match(Set result (CountPositives ary1 len));
11512   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11513 
11514   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11515   ins_encode %{
11516     __ count_positives($ary1$$Register, $len$$Register,
11517                        $result$$Register, $tmp3$$Register,
11518                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
11519   %}
11520   ins_pipe( pipe_slow );
11521 %}
11522 
11523 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
11524                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
11525 %{
11526   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
11527   match(Set result (CountPositives ary1 len));
11528   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11529 
11530   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11531   ins_encode %{
11532     __ count_positives($ary1$$Register, $len$$Register,
11533                        $result$$Register, $tmp3$$Register,
11534                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
11535   %}
11536   ins_pipe( pipe_slow );
11537 %}
11538 
11539 // fast char[] to byte[] compression
11540 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
11541                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11542   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
11543   match(Set result (StrCompressedCopy src (Binary dst len)));
11544   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
11545          USE_KILL len, KILL tmp5, KILL cr);
11546 
11547   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11548   ins_encode %{
11549     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11550                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11551                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
11552                            knoreg, knoreg);
11553   %}
11554   ins_pipe( pipe_slow );
11555 %}
11556 
11557 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
11558                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11559   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
11560   match(Set result (StrCompressedCopy src (Binary dst len)));
11561   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
11562          USE_KILL len, KILL tmp5, KILL cr);
11563 
11564   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11565   ins_encode %{
11566     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11567                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11568                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
11569                            $ktmp1$$KRegister, $ktmp2$$KRegister);
11570   %}
11571   ins_pipe( pipe_slow );
11572 %}
11573 // fast byte[] to char[] inflation
11574 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11575                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
11576   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
11577   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11578   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11579 
11580   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11581   ins_encode %{
11582     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11583                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
11584   %}
11585   ins_pipe( pipe_slow );
11586 %}
11587 
11588 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11589                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
11590   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
11591   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11592   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11593 
11594   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11595   ins_encode %{
11596     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11597                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
11598   %}
11599   ins_pipe( pipe_slow );
11600 %}
11601 
11602 // encode char[] to byte[] in ISO_8859_1
11603 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11604                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
11605                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11606   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
11607   match(Set result (EncodeISOArray src (Binary dst len)));
11608   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11609 
11610   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
11611   ins_encode %{
11612     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11613                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11614                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
11615   %}
11616   ins_pipe( pipe_slow );
11617 %}
11618 
11619 // encode char[] to byte[] in ASCII
11620 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
11621                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
11622                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
11623   predicate(((EncodeISOArrayNode*)n)->is_ascii());
11624   match(Set result (EncodeISOArray src (Binary dst len)));
11625   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11626 
11627   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
11628   ins_encode %{
11629     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11630                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11631                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
11632   %}
11633   ins_pipe( pipe_slow );
11634 %}
11635 
11636 //----------Overflow Math Instructions-----------------------------------------
11637 
11638 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11639 %{
11640   match(Set cr (OverflowAddI op1 op2));
11641   effect(DEF cr, USE_KILL op1, USE op2);
11642 
11643   format %{ "addl    $op1, $op2\t# overflow check int" %}
11644 
11645   ins_encode %{
11646     __ addl($op1$$Register, $op2$$Register);
11647   %}
11648   ins_pipe(ialu_reg_reg);
11649 %}
11650 
11651 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
11652 %{
11653   match(Set cr (OverflowAddI op1 op2));
11654   effect(DEF cr, USE_KILL op1, USE op2);
11655 
11656   format %{ "addl    $op1, $op2\t# overflow check int" %}
11657 
11658   ins_encode %{
11659     __ addl($op1$$Register, $op2$$constant);
11660   %}
11661   ins_pipe(ialu_reg_reg);
11662 %}
11663 
11664 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11665 %{
11666   match(Set cr (OverflowAddL op1 op2));
11667   effect(DEF cr, USE_KILL op1, USE op2);
11668 
11669   format %{ "addq    $op1, $op2\t# overflow check long" %}
11670   ins_encode %{
11671     __ addq($op1$$Register, $op2$$Register);
11672   %}
11673   ins_pipe(ialu_reg_reg);
11674 %}
11675 
11676 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
11677 %{
11678   match(Set cr (OverflowAddL op1 op2));
11679   effect(DEF cr, USE_KILL op1, USE op2);
11680 
11681   format %{ "addq    $op1, $op2\t# overflow check long" %}
11682   ins_encode %{
11683     __ addq($op1$$Register, $op2$$constant);
11684   %}
11685   ins_pipe(ialu_reg_reg);
11686 %}
11687 
11688 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11689 %{
11690   match(Set cr (OverflowSubI op1 op2));
11691 
11692   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11693   ins_encode %{
11694     __ cmpl($op1$$Register, $op2$$Register);
11695   %}
11696   ins_pipe(ialu_reg_reg);
11697 %}
11698 
11699 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11700 %{
11701   match(Set cr (OverflowSubI op1 op2));
11702 
11703   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
11704   ins_encode %{
11705     __ cmpl($op1$$Register, $op2$$constant);
11706   %}
11707   ins_pipe(ialu_reg_reg);
11708 %}
11709 
11710 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
11711 %{
11712   match(Set cr (OverflowSubL op1 op2));
11713 
11714   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11715   ins_encode %{
11716     __ cmpq($op1$$Register, $op2$$Register);
11717   %}
11718   ins_pipe(ialu_reg_reg);
11719 %}
11720 
11721 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
11722 %{
11723   match(Set cr (OverflowSubL op1 op2));
11724 
11725   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
11726   ins_encode %{
11727     __ cmpq($op1$$Register, $op2$$constant);
11728   %}
11729   ins_pipe(ialu_reg_reg);
11730 %}
11731 
11732 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
11733 %{
11734   match(Set cr (OverflowSubI zero op2));
11735   effect(DEF cr, USE_KILL op2);
11736 
11737   format %{ "negl    $op2\t# overflow check int" %}
11738   ins_encode %{
11739     __ negl($op2$$Register);
11740   %}
11741   ins_pipe(ialu_reg_reg);
11742 %}
11743 
11744 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
11745 %{
11746   match(Set cr (OverflowSubL zero op2));
11747   effect(DEF cr, USE_KILL op2);
11748 
11749   format %{ "negq    $op2\t# overflow check long" %}
11750   ins_encode %{
11751     __ negq($op2$$Register);
11752   %}
11753   ins_pipe(ialu_reg_reg);
11754 %}
11755 
11756 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
11757 %{
11758   match(Set cr (OverflowMulI op1 op2));
11759   effect(DEF cr, USE_KILL op1, USE op2);
11760 
11761   format %{ "imull    $op1, $op2\t# overflow check int" %}
11762   ins_encode %{
11763     __ imull($op1$$Register, $op2$$Register);
11764   %}
11765   ins_pipe(ialu_reg_reg_alu0);
11766 %}
11767 
11768 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
11769 %{
11770   match(Set cr (OverflowMulI op1 op2));
11771   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11772 
11773   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
11774   ins_encode %{
11775     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
11776   %}
11777   ins_pipe(ialu_reg_reg_alu0);
11778 %}
11779 
11780 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
11781 %{
11782   match(Set cr (OverflowMulL op1 op2));
11783   effect(DEF cr, USE_KILL op1, USE op2);
11784 
11785   format %{ "imulq    $op1, $op2\t# overflow check long" %}
11786   ins_encode %{
11787     __ imulq($op1$$Register, $op2$$Register);
11788   %}
11789   ins_pipe(ialu_reg_reg_alu0);
11790 %}
11791 
11792 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
11793 %{
11794   match(Set cr (OverflowMulL op1 op2));
11795   effect(DEF cr, TEMP tmp, USE op1, USE op2);
11796 
11797   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
11798   ins_encode %{
11799     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
11800   %}
11801   ins_pipe(ialu_reg_reg_alu0);
11802 %}
11803 
11804 
11805 //----------Control Flow Instructions------------------------------------------
11806 // Signed compare Instructions
11807 
11808 // XXX more variants!!
11809 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
11810 %{
11811   match(Set cr (CmpI op1 op2));
11812   effect(DEF cr, USE op1, USE op2);
11813 
11814   format %{ "cmpl    $op1, $op2" %}
11815   ins_encode %{
11816     __ cmpl($op1$$Register, $op2$$Register);
11817   %}
11818   ins_pipe(ialu_cr_reg_reg);
11819 %}
11820 
11821 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
11822 %{
11823   match(Set cr (CmpI op1 op2));
11824 
11825   format %{ "cmpl    $op1, $op2" %}
11826   ins_encode %{
11827     __ cmpl($op1$$Register, $op2$$constant);
11828   %}
11829   ins_pipe(ialu_cr_reg_imm);
11830 %}
11831 
11832 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
11833 %{
11834   match(Set cr (CmpI op1 (LoadI op2)));
11835 
11836   ins_cost(500); // XXX
11837   format %{ "cmpl    $op1, $op2" %}
11838   ins_encode %{
11839     __ cmpl($op1$$Register, $op2$$Address);
11840   %}
11841   ins_pipe(ialu_cr_reg_mem);
11842 %}
11843 
11844 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
11845 %{
11846   match(Set cr (CmpI src zero));
11847 
11848   format %{ "testl   $src, $src" %}
11849   ins_encode %{
11850     __ testl($src$$Register, $src$$Register);
11851   %}
11852   ins_pipe(ialu_cr_reg_imm);
11853 %}
11854 
11855 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
11856 %{
11857   match(Set cr (CmpI (AndI src con) zero));
11858 
11859   format %{ "testl   $src, $con" %}
11860   ins_encode %{
11861     __ testl($src$$Register, $con$$constant);
11862   %}
11863   ins_pipe(ialu_cr_reg_imm);
11864 %}
11865 
11866 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
11867 %{
11868   match(Set cr (CmpI (AndI src1 src2) zero));
11869 
11870   format %{ "testl   $src1, $src2" %}
11871   ins_encode %{
11872     __ testl($src1$$Register, $src2$$Register);
11873   %}
11874   ins_pipe(ialu_cr_reg_imm);
11875 %}
11876 
11877 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
11878 %{
11879   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11880 
11881   format %{ "testl   $src, $mem" %}
11882   ins_encode %{
11883     __ testl($src$$Register, $mem$$Address);
11884   %}
11885   ins_pipe(ialu_cr_reg_mem);
11886 %}
11887 
11888 // Unsigned compare Instructions; really, same as signed except they
11889 // produce an rFlagsRegU instead of rFlagsReg.
11890 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11891 %{
11892   match(Set cr (CmpU op1 op2));
11893 
11894   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11895   ins_encode %{
11896     __ cmpl($op1$$Register, $op2$$Register);
11897   %}
11898   ins_pipe(ialu_cr_reg_reg);
11899 %}
11900 
11901 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11902 %{
11903   match(Set cr (CmpU op1 op2));
11904 
11905   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11906   ins_encode %{
11907     __ cmpl($op1$$Register, $op2$$constant);
11908   %}
11909   ins_pipe(ialu_cr_reg_imm);
11910 %}
11911 
11912 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
11913 %{
11914   match(Set cr (CmpU op1 (LoadI op2)));
11915 
11916   ins_cost(500); // XXX
11917   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11918   ins_encode %{
11919     __ cmpl($op1$$Register, $op2$$Address);
11920   %}
11921   ins_pipe(ialu_cr_reg_mem);
11922 %}
11923 
11924 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
11925 %{
11926   match(Set cr (CmpU src zero));
11927 
11928   format %{ "testl   $src, $src\t# unsigned" %}
11929   ins_encode %{
11930     __ testl($src$$Register, $src$$Register);
11931   %}
11932   ins_pipe(ialu_cr_reg_imm);
11933 %}
11934 
11935 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
11936 %{
11937   match(Set cr (CmpP op1 op2));
11938 
11939   format %{ "cmpq    $op1, $op2\t# ptr" %}
11940   ins_encode %{
11941     __ cmpq($op1$$Register, $op2$$Register);
11942   %}
11943   ins_pipe(ialu_cr_reg_reg);
11944 %}
11945 
11946 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
11947 %{
11948   match(Set cr (CmpP op1 (LoadP op2)));
11949   predicate(n->in(2)->as_Load()->barrier_data() == 0);
11950 
11951   ins_cost(500); // XXX
11952   format %{ "cmpq    $op1, $op2\t# ptr" %}
11953   ins_encode %{
11954     __ cmpq($op1$$Register, $op2$$Address);
11955   %}
11956   ins_pipe(ialu_cr_reg_mem);
11957 %}
11958 
11959 // XXX this is generalized by compP_rReg_mem???
11960 // Compare raw pointer (used in out-of-heap check).
11961 // Only works because non-oop pointers must be raw pointers
11962 // and raw pointers have no anti-dependencies.
11963 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
11964 %{
11965   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
11966             n->in(2)->as_Load()->barrier_data() == 0);
11967   match(Set cr (CmpP op1 (LoadP op2)));
11968 
11969   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
11970   ins_encode %{
11971     __ cmpq($op1$$Register, $op2$$Address);
11972   %}
11973   ins_pipe(ialu_cr_reg_mem);
11974 %}
11975 
11976 // This will generate a signed flags result. This should be OK since
11977 // any compare to a zero should be eq/neq.
11978 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
11979 %{
11980   match(Set cr (CmpP src zero));
11981 
11982   format %{ "testq   $src, $src\t# ptr" %}
11983   ins_encode %{
11984     __ testq($src$$Register, $src$$Register);
11985   %}
11986   ins_pipe(ialu_cr_reg_imm);
11987 %}
11988 
11989 // This will generate a signed flags result. This should be OK since
11990 // any compare to a zero should be eq/neq.
11991 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
11992 %{
11993   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
11994             n->in(1)->as_Load()->barrier_data() == 0);
11995   match(Set cr (CmpP (LoadP op) zero));
11996 
11997   ins_cost(500); // XXX
11998   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
11999   ins_encode %{
12000     __ testq($op$$Address, 0xFFFFFFFF);
12001   %}
12002   ins_pipe(ialu_cr_reg_imm);
12003 %}
12004 
12005 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12006 %{
12007   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
12008             n->in(1)->as_Load()->barrier_data() == 0);
12009   match(Set cr (CmpP (LoadP mem) zero));
12010 
12011   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12012   ins_encode %{
12013     __ cmpq(r12, $mem$$Address);
12014   %}
12015   ins_pipe(ialu_cr_reg_mem);
12016 %}
12017 
12018 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12019 %{
12020   match(Set cr (CmpN op1 op2));
12021 
12022   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12023   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12024   ins_pipe(ialu_cr_reg_reg);
12025 %}
12026 
12027 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12028 %{
12029   match(Set cr (CmpN src (LoadN mem)));
12030 
12031   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12032   ins_encode %{
12033     __ cmpl($src$$Register, $mem$$Address);
12034   %}
12035   ins_pipe(ialu_cr_reg_mem);
12036 %}
12037 
12038 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12039   match(Set cr (CmpN op1 op2));
12040 
12041   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12042   ins_encode %{
12043     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12044   %}
12045   ins_pipe(ialu_cr_reg_imm);
12046 %}
12047 
12048 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12049 %{
12050   match(Set cr (CmpN src (LoadN mem)));
12051 
12052   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12053   ins_encode %{
12054     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12055   %}
12056   ins_pipe(ialu_cr_reg_mem);
12057 %}
12058 
12059 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12060   match(Set cr (CmpN op1 op2));
12061 
12062   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12063   ins_encode %{
12064     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12065   %}
12066   ins_pipe(ialu_cr_reg_imm);
12067 %}
12068 
12069 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12070 %{
12071   match(Set cr (CmpN src (LoadNKlass mem)));
12072 
12073   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12074   ins_encode %{
12075     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12076   %}
12077   ins_pipe(ialu_cr_reg_mem);
12078 %}
12079 
12080 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12081   match(Set cr (CmpN src zero));
12082 
12083   format %{ "testl   $src, $src\t# compressed ptr" %}
12084   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12085   ins_pipe(ialu_cr_reg_imm);
12086 %}
12087 
12088 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12089 %{
12090   predicate(CompressedOops::base() != nullptr);
12091   match(Set cr (CmpN (LoadN mem) zero));
12092 
12093   ins_cost(500); // XXX
12094   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12095   ins_encode %{
12096     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12097   %}
12098   ins_pipe(ialu_cr_reg_mem);
12099 %}
12100 
12101 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12102 %{
12103   predicate(CompressedOops::base() == nullptr);
12104   match(Set cr (CmpN (LoadN mem) zero));
12105 
12106   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12107   ins_encode %{
12108     __ cmpl(r12, $mem$$Address);
12109   %}
12110   ins_pipe(ialu_cr_reg_mem);
12111 %}
12112 
12113 // Yanked all unsigned pointer compare operations.
12114 // Pointer compares are done with CmpP which is already unsigned.
12115 
12116 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12117 %{
12118   match(Set cr (CmpL op1 op2));
12119 
12120   format %{ "cmpq    $op1, $op2" %}
12121   ins_encode %{
12122     __ cmpq($op1$$Register, $op2$$Register);
12123   %}
12124   ins_pipe(ialu_cr_reg_reg);
12125 %}
12126 
12127 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12128 %{
12129   match(Set cr (CmpL op1 op2));
12130 
12131   format %{ "cmpq    $op1, $op2" %}
12132   ins_encode %{
12133     __ cmpq($op1$$Register, $op2$$constant);
12134   %}
12135   ins_pipe(ialu_cr_reg_imm);
12136 %}
12137 
12138 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12139 %{
12140   match(Set cr (CmpL op1 (LoadL op2)));
12141 
12142   format %{ "cmpq    $op1, $op2" %}
12143   ins_encode %{
12144     __ cmpq($op1$$Register, $op2$$Address);
12145   %}
12146   ins_pipe(ialu_cr_reg_mem);
12147 %}
12148 
12149 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12150 %{
12151   match(Set cr (CmpL src zero));
12152 
12153   format %{ "testq   $src, $src" %}
12154   ins_encode %{
12155     __ testq($src$$Register, $src$$Register);
12156   %}
12157   ins_pipe(ialu_cr_reg_imm);
12158 %}
12159 
12160 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12161 %{
12162   match(Set cr (CmpL (AndL src con) zero));
12163 
12164   format %{ "testq   $src, $con\t# long" %}
12165   ins_encode %{
12166     __ testq($src$$Register, $con$$constant);
12167   %}
12168   ins_pipe(ialu_cr_reg_imm);
12169 %}
12170 
12171 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
12172 %{
12173   match(Set cr (CmpL (AndL src1 src2) zero));
12174 
12175   format %{ "testq   $src1, $src2\t# long" %}
12176   ins_encode %{
12177     __ testq($src1$$Register, $src2$$Register);
12178   %}
12179   ins_pipe(ialu_cr_reg_imm);
12180 %}
12181 
12182 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12183 %{
12184   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12185 
12186   format %{ "testq   $src, $mem" %}
12187   ins_encode %{
12188     __ testq($src$$Register, $mem$$Address);
12189   %}
12190   ins_pipe(ialu_cr_reg_mem);
12191 %}
12192 
12193 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12194 %{
12195   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12196 
12197   format %{ "testq   $src, $mem" %}
12198   ins_encode %{
12199     __ testq($src$$Register, $mem$$Address);
12200   %}
12201   ins_pipe(ialu_cr_reg_mem);
12202 %}
12203 
12204 // Manifest a CmpU result in an integer register.  Very painful.
12205 // This is the test to avoid.
12206 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
12207 %{
12208   match(Set dst (CmpU3 src1 src2));
12209   effect(KILL flags);
12210 
12211   ins_cost(275); // XXX
12212   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
12213             "movl    $dst, -1\n\t"
12214             "jb,u    done\n\t"
12215             "setne   $dst\n\t"
12216             "movzbl  $dst, $dst\n\t"
12217     "done:" %}
12218   ins_encode %{
12219     Label done;
12220     __ cmpl($src1$$Register, $src2$$Register);
12221     __ movl($dst$$Register, -1);
12222     __ jccb(Assembler::below, done);
12223     __ setb(Assembler::notZero, $dst$$Register);
12224     __ movzbl($dst$$Register, $dst$$Register);
12225     __ bind(done);
12226   %}
12227   ins_pipe(pipe_slow);
12228 %}
12229 
12230 // Manifest a CmpL result in an integer register.  Very painful.
12231 // This is the test to avoid.
12232 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12233 %{
12234   match(Set dst (CmpL3 src1 src2));
12235   effect(KILL flags);
12236 
12237   ins_cost(275); // XXX
12238   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12239             "movl    $dst, -1\n\t"
12240             "jl,s    done\n\t"
12241             "setne   $dst\n\t"
12242             "movzbl  $dst, $dst\n\t"
12243     "done:" %}
12244   ins_encode %{
12245     Label done;
12246     __ cmpq($src1$$Register, $src2$$Register);
12247     __ movl($dst$$Register, -1);
12248     __ jccb(Assembler::less, done);
12249     __ setb(Assembler::notZero, $dst$$Register);
12250     __ movzbl($dst$$Register, $dst$$Register);
12251     __ bind(done);
12252   %}
12253   ins_pipe(pipe_slow);
12254 %}
12255 
12256 // Manifest a CmpUL result in an integer register.  Very painful.
12257 // This is the test to avoid.
12258 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12259 %{
12260   match(Set dst (CmpUL3 src1 src2));
12261   effect(KILL flags);
12262 
12263   ins_cost(275); // XXX
12264   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12265             "movl    $dst, -1\n\t"
12266             "jb,u    done\n\t"
12267             "setne   $dst\n\t"
12268             "movzbl  $dst, $dst\n\t"
12269     "done:" %}
12270   ins_encode %{
12271     Label done;
12272     __ cmpq($src1$$Register, $src2$$Register);
12273     __ movl($dst$$Register, -1);
12274     __ jccb(Assembler::below, done);
12275     __ setb(Assembler::notZero, $dst$$Register);
12276     __ movzbl($dst$$Register, $dst$$Register);
12277     __ bind(done);
12278   %}
12279   ins_pipe(pipe_slow);
12280 %}
12281 
12282 // Unsigned long compare Instructions; really, same as signed long except they
12283 // produce an rFlagsRegU instead of rFlagsReg.
12284 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
12285 %{
12286   match(Set cr (CmpUL op1 op2));
12287 
12288   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12289   ins_encode %{
12290     __ cmpq($op1$$Register, $op2$$Register);
12291   %}
12292   ins_pipe(ialu_cr_reg_reg);
12293 %}
12294 
12295 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
12296 %{
12297   match(Set cr (CmpUL op1 op2));
12298 
12299   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12300   ins_encode %{
12301     __ cmpq($op1$$Register, $op2$$constant);
12302   %}
12303   ins_pipe(ialu_cr_reg_imm);
12304 %}
12305 
12306 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
12307 %{
12308   match(Set cr (CmpUL op1 (LoadL op2)));
12309 
12310   format %{ "cmpq    $op1, $op2\t# unsigned" %}
12311   ins_encode %{
12312     __ cmpq($op1$$Register, $op2$$Address);
12313   %}
12314   ins_pipe(ialu_cr_reg_mem);
12315 %}
12316 
12317 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
12318 %{
12319   match(Set cr (CmpUL src zero));
12320 
12321   format %{ "testq   $src, $src\t# unsigned" %}
12322   ins_encode %{
12323     __ testq($src$$Register, $src$$Register);
12324   %}
12325   ins_pipe(ialu_cr_reg_imm);
12326 %}
12327 
12328 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
12329 %{
12330   match(Set cr (CmpI (LoadB mem) imm));
12331 
12332   ins_cost(125);
12333   format %{ "cmpb    $mem, $imm" %}
12334   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
12335   ins_pipe(ialu_cr_reg_mem);
12336 %}
12337 
12338 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
12339 %{
12340   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
12341 
12342   ins_cost(125);
12343   format %{ "testb   $mem, $imm\t# ubyte" %}
12344   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12345   ins_pipe(ialu_cr_reg_mem);
12346 %}
12347 
12348 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
12349 %{
12350   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
12351 
12352   ins_cost(125);
12353   format %{ "testb   $mem, $imm\t# byte" %}
12354   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
12355   ins_pipe(ialu_cr_reg_mem);
12356 %}
12357 
12358 //----------Max and Min--------------------------------------------------------
12359 // Min Instructions
12360 
12361 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
12362 %{
12363   effect(USE_DEF dst, USE src, USE cr);
12364 
12365   format %{ "cmovlgt $dst, $src\t# min" %}
12366   ins_encode %{
12367     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
12368   %}
12369   ins_pipe(pipe_cmov_reg);
12370 %}
12371 
12372 
12373 instruct minI_rReg(rRegI dst, rRegI src)
12374 %{
12375   match(Set dst (MinI dst src));
12376 
12377   ins_cost(200);
12378   expand %{
12379     rFlagsReg cr;
12380     compI_rReg(cr, dst, src);
12381     cmovI_reg_g(dst, src, cr);
12382   %}
12383 %}
12384 
12385 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
12386 %{
12387   effect(USE_DEF dst, USE src, USE cr);
12388 
12389   format %{ "cmovllt $dst, $src\t# max" %}
12390   ins_encode %{
12391     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
12392   %}
12393   ins_pipe(pipe_cmov_reg);
12394 %}
12395 
12396 
12397 instruct maxI_rReg(rRegI dst, rRegI src)
12398 %{
12399   match(Set dst (MaxI dst src));
12400 
12401   ins_cost(200);
12402   expand %{
12403     rFlagsReg cr;
12404     compI_rReg(cr, dst, src);
12405     cmovI_reg_l(dst, src, cr);
12406   %}
12407 %}
12408 
12409 // ============================================================================
12410 // Branch Instructions
12411 
12412 // Jump Direct - Label defines a relative address from JMP+1
12413 instruct jmpDir(label labl)
12414 %{
12415   match(Goto);
12416   effect(USE labl);
12417 
12418   ins_cost(300);
12419   format %{ "jmp     $labl" %}
12420   size(5);
12421   ins_encode %{
12422     Label* L = $labl$$label;
12423     __ jmp(*L, false); // Always long jump
12424   %}
12425   ins_pipe(pipe_jmp);
12426 %}
12427 
12428 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12429 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
12430 %{
12431   match(If cop cr);
12432   effect(USE labl);
12433 
12434   ins_cost(300);
12435   format %{ "j$cop     $labl" %}
12436   size(6);
12437   ins_encode %{
12438     Label* L = $labl$$label;
12439     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12440   %}
12441   ins_pipe(pipe_jcc);
12442 %}
12443 
12444 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12445 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
12446 %{
12447   match(CountedLoopEnd cop cr);
12448   effect(USE labl);
12449 
12450   ins_cost(300);
12451   format %{ "j$cop     $labl\t# loop end" %}
12452   size(6);
12453   ins_encode %{
12454     Label* L = $labl$$label;
12455     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12456   %}
12457   ins_pipe(pipe_jcc);
12458 %}
12459 
12460 // Jump Direct Conditional - using unsigned comparison
12461 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12462   match(If cop cmp);
12463   effect(USE labl);
12464 
12465   ins_cost(300);
12466   format %{ "j$cop,u   $labl" %}
12467   size(6);
12468   ins_encode %{
12469     Label* L = $labl$$label;
12470     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12471   %}
12472   ins_pipe(pipe_jcc);
12473 %}
12474 
12475 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12476   match(If cop cmp);
12477   effect(USE labl);
12478 
12479   ins_cost(200);
12480   format %{ "j$cop,u   $labl" %}
12481   size(6);
12482   ins_encode %{
12483     Label* L = $labl$$label;
12484     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12485   %}
12486   ins_pipe(pipe_jcc);
12487 %}
12488 
12489 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12490   match(If cop cmp);
12491   effect(USE labl);
12492 
12493   ins_cost(200);
12494   format %{ $$template
12495     if ($cop$$cmpcode == Assembler::notEqual) {
12496       $$emit$$"jp,u    $labl\n\t"
12497       $$emit$$"j$cop,u   $labl"
12498     } else {
12499       $$emit$$"jp,u    done\n\t"
12500       $$emit$$"j$cop,u   $labl\n\t"
12501       $$emit$$"done:"
12502     }
12503   %}
12504   ins_encode %{
12505     Label* l = $labl$$label;
12506     if ($cop$$cmpcode == Assembler::notEqual) {
12507       __ jcc(Assembler::parity, *l, false);
12508       __ jcc(Assembler::notEqual, *l, false);
12509     } else if ($cop$$cmpcode == Assembler::equal) {
12510       Label done;
12511       __ jccb(Assembler::parity, done);
12512       __ jcc(Assembler::equal, *l, false);
12513       __ bind(done);
12514     } else {
12515        ShouldNotReachHere();
12516     }
12517   %}
12518   ins_pipe(pipe_jcc);
12519 %}
12520 
12521 // ============================================================================
12522 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
12523 // superklass array for an instance of the superklass.  Set a hidden
12524 // internal cache on a hit (cache is checked with exposed code in
12525 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
12526 // encoding ALSO sets flags.
12527 
12528 instruct partialSubtypeCheck(rdi_RegP result,
12529                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12530                              rFlagsReg cr)
12531 %{
12532   match(Set result (PartialSubtypeCheck sub super));
12533   effect(KILL rcx, KILL cr);
12534 
12535   ins_cost(1100);  // slightly larger than the next version
12536   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12537             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12538             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12539             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
12540             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
12541             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12542             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
12543     "miss:\t" %}
12544 
12545   opcode(0x1); // Force a XOR of RDI
12546   ins_encode(enc_PartialSubtypeCheck());
12547   ins_pipe(pipe_slow);
12548 %}
12549 
12550 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
12551                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
12552                                      immP0 zero,
12553                                      rdi_RegP result)
12554 %{
12555   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12556   effect(KILL rcx, KILL result);
12557 
12558   ins_cost(1000);
12559   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
12560             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
12561             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
12562             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
12563             "jne,s   miss\t\t# Missed: flags nz\n\t"
12564             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
12565     "miss:\t" %}
12566 
12567   opcode(0x0); // No need to XOR RDI
12568   ins_encode(enc_PartialSubtypeCheck());
12569   ins_pipe(pipe_slow);
12570 %}
12571 
12572 // ============================================================================
12573 // Branch Instructions -- short offset versions
12574 //
12575 // These instructions are used to replace jumps of a long offset (the default
12576 // match) with jumps of a shorter offset.  These instructions are all tagged
12577 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12578 // match rules in general matching.  Instead, the ADLC generates a conversion
12579 // method in the MachNode which can be used to do in-place replacement of the
12580 // long variant with the shorter variant.  The compiler will determine if a
12581 // branch can be taken by the is_short_branch_offset() predicate in the machine
12582 // specific code section of the file.
12583 
12584 // Jump Direct - Label defines a relative address from JMP+1
12585 instruct jmpDir_short(label labl) %{
12586   match(Goto);
12587   effect(USE labl);
12588 
12589   ins_cost(300);
12590   format %{ "jmp,s   $labl" %}
12591   size(2);
12592   ins_encode %{
12593     Label* L = $labl$$label;
12594     __ jmpb(*L);
12595   %}
12596   ins_pipe(pipe_jmp);
12597   ins_short_branch(1);
12598 %}
12599 
12600 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12601 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
12602   match(If cop cr);
12603   effect(USE labl);
12604 
12605   ins_cost(300);
12606   format %{ "j$cop,s   $labl" %}
12607   size(2);
12608   ins_encode %{
12609     Label* L = $labl$$label;
12610     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12611   %}
12612   ins_pipe(pipe_jcc);
12613   ins_short_branch(1);
12614 %}
12615 
12616 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12617 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
12618   match(CountedLoopEnd cop cr);
12619   effect(USE labl);
12620 
12621   ins_cost(300);
12622   format %{ "j$cop,s   $labl\t# loop end" %}
12623   size(2);
12624   ins_encode %{
12625     Label* L = $labl$$label;
12626     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12627   %}
12628   ins_pipe(pipe_jcc);
12629   ins_short_branch(1);
12630 %}
12631 
12632 // Jump Direct Conditional - using unsigned comparison
12633 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
12634   match(If cop cmp);
12635   effect(USE labl);
12636 
12637   ins_cost(300);
12638   format %{ "j$cop,us  $labl" %}
12639   size(2);
12640   ins_encode %{
12641     Label* L = $labl$$label;
12642     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12643   %}
12644   ins_pipe(pipe_jcc);
12645   ins_short_branch(1);
12646 %}
12647 
12648 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
12649   match(If cop cmp);
12650   effect(USE labl);
12651 
12652   ins_cost(300);
12653   format %{ "j$cop,us  $labl" %}
12654   size(2);
12655   ins_encode %{
12656     Label* L = $labl$$label;
12657     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12658   %}
12659   ins_pipe(pipe_jcc);
12660   ins_short_branch(1);
12661 %}
12662 
12663 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
12664   match(If cop cmp);
12665   effect(USE labl);
12666 
12667   ins_cost(300);
12668   format %{ $$template
12669     if ($cop$$cmpcode == Assembler::notEqual) {
12670       $$emit$$"jp,u,s  $labl\n\t"
12671       $$emit$$"j$cop,u,s  $labl"
12672     } else {
12673       $$emit$$"jp,u,s  done\n\t"
12674       $$emit$$"j$cop,u,s  $labl\n\t"
12675       $$emit$$"done:"
12676     }
12677   %}
12678   size(4);
12679   ins_encode %{
12680     Label* l = $labl$$label;
12681     if ($cop$$cmpcode == Assembler::notEqual) {
12682       __ jccb(Assembler::parity, *l);
12683       __ jccb(Assembler::notEqual, *l);
12684     } else if ($cop$$cmpcode == Assembler::equal) {
12685       Label done;
12686       __ jccb(Assembler::parity, done);
12687       __ jccb(Assembler::equal, *l);
12688       __ bind(done);
12689     } else {
12690        ShouldNotReachHere();
12691     }
12692   %}
12693   ins_pipe(pipe_jcc);
12694   ins_short_branch(1);
12695 %}
12696 
12697 // ============================================================================
12698 // inlined locking and unlocking
12699 
12700 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
12701   predicate(Compile::current()->use_rtm());
12702   match(Set cr (FastLock object box));
12703   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12704   ins_cost(300);
12705   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12706   ins_encode %{
12707     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12708                  $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread,
12709                  _rtm_counters, _stack_rtm_counters,
12710                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12711                  true, ra_->C->profile_rtm());
12712   %}
12713   ins_pipe(pipe_slow);
12714 %}
12715 
12716 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
12717   predicate(!Compile::current()->use_rtm());
12718   match(Set cr (FastLock object box));
12719   effect(TEMP tmp, TEMP scr, USE_KILL box);
12720   ins_cost(300);
12721   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
12722   ins_encode %{
12723     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12724                  $scr$$Register, noreg, noreg, r15_thread, nullptr, nullptr, nullptr, false, false);
12725   %}
12726   ins_pipe(pipe_slow);
12727 %}
12728 
12729 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
12730   match(Set cr (FastUnlock object box));
12731   effect(TEMP tmp, USE_KILL box);
12732   ins_cost(300);
12733   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
12734   ins_encode %{
12735     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12736   %}
12737   ins_pipe(pipe_slow);
12738 %}
12739 
12740 
12741 // ============================================================================
12742 // Safepoint Instructions
12743 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
12744 %{
12745   match(SafePoint poll);
12746   effect(KILL cr, USE poll);
12747 
12748   format %{ "testl   rax, [$poll]\t"
12749             "# Safepoint: poll for GC" %}
12750   ins_cost(125);
12751   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
12752   ins_encode %{
12753     __ relocate(relocInfo::poll_type);
12754     address pre_pc = __ pc();
12755     __ testl(rax, Address($poll$$Register, 0));
12756     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
12757   %}
12758   ins_pipe(ialu_reg_mem);
12759 %}
12760 
12761 instruct mask_all_evexL(kReg dst, rRegL src) %{
12762   match(Set dst (MaskAll src));
12763   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
12764   ins_encode %{
12765     int mask_len = Matcher::vector_length(this);
12766     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
12767   %}
12768   ins_pipe( pipe_slow );
12769 %}
12770 
12771 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
12772   predicate(Matcher::vector_length(n) > 32);
12773   match(Set dst (MaskAll src));
12774   effect(TEMP tmp);
12775   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
12776   ins_encode %{
12777     int mask_len = Matcher::vector_length(this);
12778     __ movslq($tmp$$Register, $src$$Register);
12779     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
12780   %}
12781   ins_pipe( pipe_slow );
12782 %}
12783 
12784 // ============================================================================
12785 // Procedure Call/Return Instructions
12786 // Call Java Static Instruction
12787 // Note: If this code changes, the corresponding ret_addr_offset() and
12788 //       compute_padding() functions will have to be adjusted.
12789 instruct CallStaticJavaDirect(method meth) %{
12790   match(CallStaticJava);
12791   effect(USE meth);
12792 
12793   ins_cost(300);
12794   format %{ "call,static " %}
12795   opcode(0xE8); /* E8 cd */
12796   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
12797   ins_pipe(pipe_slow);
12798   ins_alignment(4);
12799 %}
12800 
12801 // Call Java Dynamic Instruction
12802 // Note: If this code changes, the corresponding ret_addr_offset() and
12803 //       compute_padding() functions will have to be adjusted.
12804 instruct CallDynamicJavaDirect(method meth)
12805 %{
12806   match(CallDynamicJava);
12807   effect(USE meth);
12808 
12809   ins_cost(300);
12810   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
12811             "call,dynamic " %}
12812   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
12813   ins_pipe(pipe_slow);
12814   ins_alignment(4);
12815 %}
12816 
12817 // Call Runtime Instruction
12818 instruct CallRuntimeDirect(method meth)
12819 %{
12820   match(CallRuntime);
12821   effect(USE meth);
12822 
12823   ins_cost(300);
12824   format %{ "call,runtime " %}
12825   ins_encode(clear_avx, Java_To_Runtime(meth));
12826   ins_pipe(pipe_slow);
12827 %}
12828 
12829 // Call runtime without safepoint
12830 instruct CallLeafDirect(method meth)
12831 %{
12832   match(CallLeaf);
12833   effect(USE meth);
12834 
12835   ins_cost(300);
12836   format %{ "call_leaf,runtime " %}
12837   ins_encode(clear_avx, Java_To_Runtime(meth));
12838   ins_pipe(pipe_slow);
12839 %}
12840 
12841 // Call runtime without safepoint and with vector arguments
12842 instruct CallLeafDirectVector(method meth)
12843 %{
12844   match(CallLeafVector);
12845   effect(USE meth);
12846 
12847   ins_cost(300);
12848   format %{ "call_leaf,vector " %}
12849   ins_encode(Java_To_Runtime(meth));
12850   ins_pipe(pipe_slow);
12851 %}
12852 
12853 // Call runtime without safepoint
12854 // entry point is null, target holds the address to call
12855 instruct CallLeafNoFPInDirect(rRegP target)
12856 %{
12857   predicate(n->as_Call()->entry_point() == nullptr);
12858   match(CallLeafNoFP target);
12859 
12860   ins_cost(300);
12861   format %{ "call_leaf_nofp,runtime indirect " %}
12862   ins_encode %{
12863      __ call($target$$Register);
12864   %}
12865 
12866   ins_pipe(pipe_slow);
12867 %}
12868 
12869 instruct CallLeafNoFPDirect(method meth)
12870 %{
12871   predicate(n->as_Call()->entry_point() != nullptr);
12872   match(CallLeafNoFP);
12873   effect(USE meth);
12874 
12875   ins_cost(300);
12876   format %{ "call_leaf_nofp,runtime " %}
12877   ins_encode(clear_avx, Java_To_Runtime(meth));
12878   ins_pipe(pipe_slow);
12879 %}
12880 
12881 // Return Instruction
12882 // Remove the return address & jump to it.
12883 // Notice: We always emit a nop after a ret to make sure there is room
12884 // for safepoint patching
12885 instruct Ret()
12886 %{
12887   match(Return);
12888 
12889   format %{ "ret" %}
12890   ins_encode %{
12891     __ ret(0);
12892   %}
12893   ins_pipe(pipe_jmp);
12894 %}
12895 
12896 // Tail Call; Jump from runtime stub to Java code.
12897 // Also known as an 'interprocedural jump'.
12898 // Target of jump will eventually return to caller.
12899 // TailJump below removes the return address.
12900 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
12901 %{
12902   match(TailCall jump_target method_ptr);
12903 
12904   ins_cost(300);
12905   format %{ "jmp     $jump_target\t# rbx holds method" %}
12906   ins_encode %{
12907     __ jmp($jump_target$$Register);
12908   %}
12909   ins_pipe(pipe_jmp);
12910 %}
12911 
12912 // Tail Jump; remove the return address; jump to target.
12913 // TailCall above leaves the return address around.
12914 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
12915 %{
12916   match(TailJump jump_target ex_oop);
12917 
12918   ins_cost(300);
12919   format %{ "popq    rdx\t# pop return address\n\t"
12920             "jmp     $jump_target" %}
12921   ins_encode %{
12922     __ popq(as_Register(RDX_enc));
12923     __ jmp($jump_target$$Register);
12924   %}
12925   ins_pipe(pipe_jmp);
12926 %}
12927 
12928 // Create exception oop: created by stack-crawling runtime code.
12929 // Created exception is now available to this handler, and is setup
12930 // just prior to jumping to this handler.  No code emitted.
12931 instruct CreateException(rax_RegP ex_oop)
12932 %{
12933   match(Set ex_oop (CreateEx));
12934 
12935   size(0);
12936   // use the following format syntax
12937   format %{ "# exception oop is in rax; no code emitted" %}
12938   ins_encode();
12939   ins_pipe(empty);
12940 %}
12941 
12942 // Rethrow exception:
12943 // The exception oop will come in the first argument position.
12944 // Then JUMP (not call) to the rethrow stub code.
12945 instruct RethrowException()
12946 %{
12947   match(Rethrow);
12948 
12949   // use the following format syntax
12950   format %{ "jmp     rethrow_stub" %}
12951   ins_encode %{
12952     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
12953   %}
12954   ins_pipe(pipe_jmp);
12955 %}
12956 
12957 // ============================================================================
12958 // This name is KNOWN by the ADLC and cannot be changed.
12959 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12960 // for this guy.
12961 instruct tlsLoadP(r15_RegP dst) %{
12962   match(Set dst (ThreadLocal));
12963   effect(DEF dst);
12964 
12965   size(0);
12966   format %{ "# TLS is in R15" %}
12967   ins_encode( /*empty encoding*/ );
12968   ins_pipe(ialu_reg_reg);
12969 %}
12970 
12971 
12972 //----------PEEPHOLE RULES-----------------------------------------------------
12973 // These must follow all instruction definitions as they use the names
12974 // defined in the instructions definitions.
12975 //
12976 // peeppredicate ( rule_predicate );
12977 // // the predicate unless which the peephole rule will be ignored
12978 //
12979 // peepmatch ( root_instr_name [preceding_instruction]* );
12980 //
12981 // peepprocedure ( procedure_name );
12982 // // provide a procedure name to perform the optimization, the procedure should
12983 // // reside in the architecture dependent peephole file, the method has the
12984 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
12985 // // with the arguments being the basic block, the current node index inside the
12986 // // block, the register allocator, the functions upon invoked return a new node
12987 // // defined in peepreplace, and the rules of the nodes appearing in the
12988 // // corresponding peepmatch, the function return true if successful, else
12989 // // return false
12990 //
12991 // peepconstraint %{
12992 // (instruction_number.operand_name relational_op instruction_number.operand_name
12993 //  [, ...] );
12994 // // instruction numbers are zero-based using left to right order in peepmatch
12995 //
12996 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12997 // // provide an instruction_number.operand_name for each operand that appears
12998 // // in the replacement instruction's match rule
12999 //
13000 // ---------VM FLAGS---------------------------------------------------------
13001 //
13002 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13003 //
13004 // Each peephole rule is given an identifying number starting with zero and
13005 // increasing by one in the order seen by the parser.  An individual peephole
13006 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13007 // on the command-line.
13008 //
13009 // ---------CURRENT LIMITATIONS----------------------------------------------
13010 //
13011 // Only transformations inside a basic block (do we need more for peephole)
13012 //
13013 // ---------EXAMPLE----------------------------------------------------------
13014 //
13015 // // pertinent parts of existing instructions in architecture description
13016 // instruct movI(rRegI dst, rRegI src)
13017 // %{
13018 //   match(Set dst (CopyI src));
13019 // %}
13020 //
13021 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13022 // %{
13023 //   match(Set dst (AddI dst src));
13024 //   effect(KILL cr);
13025 // %}
13026 //
13027 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
13028 // %{
13029 //   match(Set dst (AddI dst src));
13030 // %}
13031 //
13032 // 1. Simple replacement
13033 // - Only match adjacent instructions in same basic block
13034 // - Only equality constraints
13035 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
13036 // - Only one replacement instruction
13037 //
13038 // // Change (inc mov) to lea
13039 // peephole %{
13040 //   // lea should only be emitted when beneficial
13041 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13042 //   // increment preceded by register-register move
13043 //   peepmatch ( incI_rReg movI );
13044 //   // require that the destination register of the increment
13045 //   // match the destination register of the move
13046 //   peepconstraint ( 0.dst == 1.dst );
13047 //   // construct a replacement instruction that sets
13048 //   // the destination to ( move's source register + one )
13049 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13050 // %}
13051 //
13052 // 2. Procedural replacement
13053 // - More flexible finding relevent nodes
13054 // - More flexible constraints
13055 // - More flexible transformations
13056 // - May utilise architecture-dependent API more effectively
13057 // - Currently only one replacement instruction due to adlc parsing capabilities
13058 //
13059 // // Change (inc mov) to lea
13060 // peephole %{
13061 //   // lea should only be emitted when beneficial
13062 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
13063 //   // the rule numbers of these nodes inside are passed into the function below
13064 //   peepmatch ( incI_rReg movI );
13065 //   // the method that takes the responsibility of transformation
13066 //   peepprocedure ( inc_mov_to_lea );
13067 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
13068 //   // node is passed into the function above
13069 //   peepreplace ( leaI_rReg_immI() );
13070 // %}
13071 
13072 // These instructions is not matched by the matcher but used by the peephole
13073 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
13074 %{
13075   predicate(false);
13076   match(Set dst (AddI src1 src2));
13077   format %{ "leal    $dst, [$src1 + $src2]" %}
13078   ins_encode %{
13079     Register dst = $dst$$Register;
13080     Register src1 = $src1$$Register;
13081     Register src2 = $src2$$Register;
13082     if (src1 != rbp && src1 != r13) {
13083       __ leal(dst, Address(src1, src2, Address::times_1));
13084     } else {
13085       assert(src2 != rbp && src2 != r13, "");
13086       __ leal(dst, Address(src2, src1, Address::times_1));
13087     }
13088   %}
13089   ins_pipe(ialu_reg_reg);
13090 %}
13091 
13092 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
13093 %{
13094   predicate(false);
13095   match(Set dst (AddI src1 src2));
13096   format %{ "leal    $dst, [$src1 + $src2]" %}
13097   ins_encode %{
13098     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
13099   %}
13100   ins_pipe(ialu_reg_reg);
13101 %}
13102 
13103 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
13104 %{
13105   predicate(false);
13106   match(Set dst (LShiftI src shift));
13107   format %{ "leal    $dst, [$src << $shift]" %}
13108   ins_encode %{
13109     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13110     Register src = $src$$Register;
13111     if (scale == Address::times_2 && src != rbp && src != r13) {
13112       __ leal($dst$$Register, Address(src, src, Address::times_1));
13113     } else {
13114       __ leal($dst$$Register, Address(noreg, src, scale));
13115     }
13116   %}
13117   ins_pipe(ialu_reg_reg);
13118 %}
13119 
13120 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
13121 %{
13122   predicate(false);
13123   match(Set dst (AddL src1 src2));
13124   format %{ "leaq    $dst, [$src1 + $src2]" %}
13125   ins_encode %{
13126     Register dst = $dst$$Register;
13127     Register src1 = $src1$$Register;
13128     Register src2 = $src2$$Register;
13129     if (src1 != rbp && src1 != r13) {
13130       __ leaq(dst, Address(src1, src2, Address::times_1));
13131     } else {
13132       assert(src2 != rbp && src2 != r13, "");
13133       __ leaq(dst, Address(src2, src1, Address::times_1));
13134     }
13135   %}
13136   ins_pipe(ialu_reg_reg);
13137 %}
13138 
13139 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
13140 %{
13141   predicate(false);
13142   match(Set dst (AddL src1 src2));
13143   format %{ "leaq    $dst, [$src1 + $src2]" %}
13144   ins_encode %{
13145     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
13146   %}
13147   ins_pipe(ialu_reg_reg);
13148 %}
13149 
13150 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
13151 %{
13152   predicate(false);
13153   match(Set dst (LShiftL src shift));
13154   format %{ "leaq    $dst, [$src << $shift]" %}
13155   ins_encode %{
13156     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
13157     Register src = $src$$Register;
13158     if (scale == Address::times_2 && src != rbp && src != r13) {
13159       __ leaq($dst$$Register, Address(src, src, Address::times_1));
13160     } else {
13161       __ leaq($dst$$Register, Address(noreg, src, scale));
13162     }
13163   %}
13164   ins_pipe(ialu_reg_reg);
13165 %}
13166 
13167 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
13168 // sal}) with lea instructions. The {add, sal} rules are beneficial in
13169 // processors with at least partial ALU support for lea
13170 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
13171 // beneficial for processors with full ALU support
13172 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
13173 
13174 peephole
13175 %{
13176   peeppredicate(VM_Version::supports_fast_2op_lea());
13177   peepmatch (addI_rReg);
13178   peepprocedure (lea_coalesce_reg);
13179   peepreplace (leaI_rReg_rReg_peep());
13180 %}
13181 
13182 peephole
13183 %{
13184   peeppredicate(VM_Version::supports_fast_2op_lea());
13185   peepmatch (addI_rReg_imm);
13186   peepprocedure (lea_coalesce_imm);
13187   peepreplace (leaI_rReg_immI_peep());
13188 %}
13189 
13190 peephole
13191 %{
13192   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13193                 VM_Version::is_intel_cascade_lake());
13194   peepmatch (incI_rReg);
13195   peepprocedure (lea_coalesce_imm);
13196   peepreplace (leaI_rReg_immI_peep());
13197 %}
13198 
13199 peephole
13200 %{
13201   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13202                 VM_Version::is_intel_cascade_lake());
13203   peepmatch (decI_rReg);
13204   peepprocedure (lea_coalesce_imm);
13205   peepreplace (leaI_rReg_immI_peep());
13206 %}
13207 
13208 peephole
13209 %{
13210   peeppredicate(VM_Version::supports_fast_2op_lea());
13211   peepmatch (salI_rReg_immI2);
13212   peepprocedure (lea_coalesce_imm);
13213   peepreplace (leaI_rReg_immI2_peep());
13214 %}
13215 
13216 peephole
13217 %{
13218   peeppredicate(VM_Version::supports_fast_2op_lea());
13219   peepmatch (addL_rReg);
13220   peepprocedure (lea_coalesce_reg);
13221   peepreplace (leaL_rReg_rReg_peep());
13222 %}
13223 
13224 peephole
13225 %{
13226   peeppredicate(VM_Version::supports_fast_2op_lea());
13227   peepmatch (addL_rReg_imm);
13228   peepprocedure (lea_coalesce_imm);
13229   peepreplace (leaL_rReg_immL32_peep());
13230 %}
13231 
13232 peephole
13233 %{
13234   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13235                 VM_Version::is_intel_cascade_lake());
13236   peepmatch (incL_rReg);
13237   peepprocedure (lea_coalesce_imm);
13238   peepreplace (leaL_rReg_immL32_peep());
13239 %}
13240 
13241 peephole
13242 %{
13243   peeppredicate(VM_Version::supports_fast_3op_lea() ||
13244                 VM_Version::is_intel_cascade_lake());
13245   peepmatch (decL_rReg);
13246   peepprocedure (lea_coalesce_imm);
13247   peepreplace (leaL_rReg_immL32_peep());
13248 %}
13249 
13250 peephole
13251 %{
13252   peeppredicate(VM_Version::supports_fast_2op_lea());
13253   peepmatch (salL_rReg_immI2);
13254   peepprocedure (lea_coalesce_imm);
13255   peepreplace (leaL_rReg_immI2_peep());
13256 %}
13257 
13258 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
13259 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
13260 
13261 //int variant
13262 peephole
13263 %{
13264   peepmatch (testI_reg);
13265   peepprocedure (test_may_remove);
13266 %}
13267 
13268 //long variant
13269 peephole
13270 %{
13271   peepmatch (testL_reg);
13272   peepprocedure (test_may_remove);
13273 %}
13274 
13275 
13276 //----------SMARTSPILL RULES---------------------------------------------------
13277 // These must follow all instruction definitions as they use the names
13278 // defined in the instructions definitions.