1 //
    2 // Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 
  132 // Floating Point Registers
  133 
  134 // Specify priority of register selection within phases of register
  135 // allocation.  Highest priority is first.  A useful heuristic is to
  136 // give registers a low priority when they are required by machine
  137 // instructions, like EAX and EDX on I486, and choose no-save registers
  138 // before save-on-call, & save-on-call before save-on-entry.  Registers
  139 // which participate in fixed calling sequences should come last.
  140 // Registers which are used as pairs must fall on an even boundary.
  141 
  142 alloc_class chunk0(R10,         R10_H,
  143                    R11,         R11_H,
  144                    R8,          R8_H,
  145                    R9,          R9_H,
  146                    R12,         R12_H,
  147                    RCX,         RCX_H,
  148                    RBX,         RBX_H,
  149                    RDI,         RDI_H,
  150                    RDX,         RDX_H,
  151                    RSI,         RSI_H,
  152                    RAX,         RAX_H,
  153                    RBP,         RBP_H,
  154                    R13,         R13_H,
  155                    R14,         R14_H,
  156                    R15,         R15_H,
  157                    RSP,         RSP_H);
  158 
  159 
  160 //----------Architecture Description Register Classes--------------------------
  161 // Several register classes are automatically defined based upon information in
  162 // this architecture description.
  163 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  164 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  165 //
  166 
  167 // Empty register class.
  168 reg_class no_reg();
  169 
  170 // Class for all pointer/long registers
  171 reg_class all_reg(RAX, RAX_H,
  172                   RDX, RDX_H,
  173                   RBP, RBP_H,
  174                   RDI, RDI_H,
  175                   RSI, RSI_H,
  176                   RCX, RCX_H,
  177                   RBX, RBX_H,
  178                   RSP, RSP_H,
  179                   R8,  R8_H,
  180                   R9,  R9_H,
  181                   R10, R10_H,
  182                   R11, R11_H,
  183                   R12, R12_H,
  184                   R13, R13_H,
  185                   R14, R14_H,
  186                   R15, R15_H);
  187 
  188 // Class for all int registers
  189 reg_class all_int_reg(RAX
  190                       RDX,
  191                       RBP,
  192                       RDI,
  193                       RSI,
  194                       RCX,
  195                       RBX,
  196                       R8,
  197                       R9,
  198                       R10,
  199                       R11,
  200                       R12,
  201                       R13,
  202                       R14);
  203 
  204 // Class for all pointer registers
  205 reg_class any_reg %{
  206   return _ANY_REG_mask;
  207 %}
  208 
  209 // Class for all pointer registers (excluding RSP)
  210 reg_class ptr_reg %{
  211   return _PTR_REG_mask;
  212 %}
  213 
  214 // Class for all pointer registers (excluding RSP and RBP)
  215 reg_class ptr_reg_no_rbp %{
  216   return _PTR_REG_NO_RBP_mask;
  217 %}
  218 
  219 // Class for all pointer registers (excluding RAX and RSP)
  220 reg_class ptr_no_rax_reg %{
  221   return _PTR_NO_RAX_REG_mask;
  222 %}
  223 
  224 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  225 reg_class ptr_no_rax_rbx_reg %{
  226   return _PTR_NO_RAX_RBX_REG_mask;
  227 %}
  228 
  229 // Class for all long registers (excluding RSP)
  230 reg_class long_reg %{
  231   return _LONG_REG_mask;
  232 %}
  233 
  234 // Class for all long registers (excluding RAX, RDX and RSP)
  235 reg_class long_no_rax_rdx_reg %{
  236   return _LONG_NO_RAX_RDX_REG_mask;
  237 %}
  238 
  239 // Class for all long registers (excluding RCX and RSP)
  240 reg_class long_no_rcx_reg %{
  241   return _LONG_NO_RCX_REG_mask;
  242 %}
  243 
  244 // Class for all long registers (excluding RBP and R13)
  245 reg_class long_no_rbp_r13_reg %{
  246   return _LONG_NO_RBP_R13_REG_mask;
  247 %}
  248 
  249 // Class for all int registers (excluding RSP)
  250 reg_class int_reg %{
  251   return _INT_REG_mask;
  252 %}
  253 
  254 // Class for all int registers (excluding RAX, RDX, and RSP)
  255 reg_class int_no_rax_rdx_reg %{
  256   return _INT_NO_RAX_RDX_REG_mask;
  257 %}
  258 
  259 // Class for all int registers (excluding RCX and RSP)
  260 reg_class int_no_rcx_reg %{
  261   return _INT_NO_RCX_REG_mask;
  262 %}
  263 
  264 // Class for all int registers (excluding RBP and R13)
  265 reg_class int_no_rbp_r13_reg %{
  266   return _INT_NO_RBP_R13_REG_mask;
  267 %}
  268 
  269 // Singleton class for RAX pointer register
  270 reg_class ptr_rax_reg(RAX, RAX_H);
  271 
  272 // Singleton class for RBX pointer register
  273 reg_class ptr_rbx_reg(RBX, RBX_H);
  274 
  275 // Singleton class for RSI pointer register
  276 reg_class ptr_rsi_reg(RSI, RSI_H);
  277 
  278 // Singleton class for RBP pointer register
  279 reg_class ptr_rbp_reg(RBP, RBP_H);
  280 
  281 // Singleton class for RDI pointer register
  282 reg_class ptr_rdi_reg(RDI, RDI_H);
  283 
  284 // Singleton class for stack pointer
  285 reg_class ptr_rsp_reg(RSP, RSP_H);
  286 
  287 // Singleton class for TLS pointer
  288 reg_class ptr_r15_reg(R15, R15_H);
  289 
  290 // Singleton class for RAX long register
  291 reg_class long_rax_reg(RAX, RAX_H);
  292 
  293 // Singleton class for RCX long register
  294 reg_class long_rcx_reg(RCX, RCX_H);
  295 
  296 // Singleton class for RDX long register
  297 reg_class long_rdx_reg(RDX, RDX_H);
  298 
  299 // Singleton class for RAX int register
  300 reg_class int_rax_reg(RAX);
  301 
  302 // Singleton class for RBX int register
  303 reg_class int_rbx_reg(RBX);
  304 
  305 // Singleton class for RCX int register
  306 reg_class int_rcx_reg(RCX);
  307 
  308 // Singleton class for RCX int register
  309 reg_class int_rdx_reg(RDX);
  310 
  311 // Singleton class for RCX int register
  312 reg_class int_rdi_reg(RDI);
  313 
  314 // Singleton class for instruction pointer
  315 // reg_class ip_reg(RIP);
  316 
  317 %}
  318 
  319 //----------SOURCE BLOCK-------------------------------------------------------
  320 // This is a block of C++ code which provides values, functions, and
  321 // definitions necessary in the rest of the architecture description
  322 source_hpp %{
  323 
  324 extern RegMask _ANY_REG_mask;
  325 extern RegMask _PTR_REG_mask;
  326 extern RegMask _PTR_REG_NO_RBP_mask;
  327 extern RegMask _PTR_NO_RAX_REG_mask;
  328 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
  329 extern RegMask _LONG_REG_mask;
  330 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
  331 extern RegMask _LONG_NO_RCX_REG_mask;
  332 extern RegMask _LONG_NO_RBP_R13_REG_mask;
  333 extern RegMask _INT_REG_mask;
  334 extern RegMask _INT_NO_RAX_RDX_REG_mask;
  335 extern RegMask _INT_NO_RCX_REG_mask;
  336 extern RegMask _INT_NO_RBP_R13_REG_mask;
  337 extern RegMask _FLOAT_REG_mask;
  338 
  339 extern RegMask _STACK_OR_PTR_REG_mask;
  340 extern RegMask _STACK_OR_LONG_REG_mask;
  341 extern RegMask _STACK_OR_INT_REG_mask;
  342 
  343 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
  344 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
  345 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
  346 
  347 %}
  348 
  349 source %{
  350 #define   RELOC_IMM64    Assembler::imm_operand
  351 #define   RELOC_DISP32   Assembler::disp32_operand
  352 
  353 #define __ _masm.
  354 
  355 RegMask _ANY_REG_mask;
  356 RegMask _PTR_REG_mask;
  357 RegMask _PTR_REG_NO_RBP_mask;
  358 RegMask _PTR_NO_RAX_REG_mask;
  359 RegMask _PTR_NO_RAX_RBX_REG_mask;
  360 RegMask _LONG_REG_mask;
  361 RegMask _LONG_NO_RAX_RDX_REG_mask;
  362 RegMask _LONG_NO_RCX_REG_mask;
  363 RegMask _LONG_NO_RBP_R13_REG_mask;
  364 RegMask _INT_REG_mask;
  365 RegMask _INT_NO_RAX_RDX_REG_mask;
  366 RegMask _INT_NO_RCX_REG_mask;
  367 RegMask _INT_NO_RBP_R13_REG_mask;
  368 RegMask _FLOAT_REG_mask;
  369 RegMask _STACK_OR_PTR_REG_mask;
  370 RegMask _STACK_OR_LONG_REG_mask;
  371 RegMask _STACK_OR_INT_REG_mask;
  372 
  373 static bool need_r12_heapbase() {
  374   return UseCompressedOops;
  375 }
  376 
  377 void reg_mask_init() {
  378   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  379   // We derive a number of subsets from it.
  380   _ANY_REG_mask = _ALL_REG_mask;
  381 
  382   if (PreserveFramePointer) {
  383     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  384     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  385   }
  386   if (need_r12_heapbase()) {
  387     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  388     _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  389   }
  390 
  391   _PTR_REG_mask = _ANY_REG_mask;
  392   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  393   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  394   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  395   _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
  396 
  397   _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  398   _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  399 
  400   _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  401   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  402   _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  403 
  404   _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  405   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  406   _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  407 
  408   _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  409   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  410   _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
  411 
  412   _LONG_REG_mask = _PTR_REG_mask;
  413   _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  414   _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  415 
  416   _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  417   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  418   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  419   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  420   _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
  421 
  422   _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  423   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  424   _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
  425 
  426   _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  427   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  428   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  429   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  430   _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
  431 
  432   _INT_REG_mask = _ALL_INT_REG_mask;
  433   if (PreserveFramePointer) {
  434     _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  435   }
  436   if (need_r12_heapbase()) {
  437     _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  438   }
  439 
  440   _STACK_OR_INT_REG_mask = _INT_REG_mask;
  441   _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());
  442 
  443   _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  444   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  445   _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  446 
  447   _INT_NO_RCX_REG_mask = _INT_REG_mask;
  448   _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  449 
  450   _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  451   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  452   _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  453 
  454   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  455   // from the float_reg_legacy/float_reg_evex register class.
  456   _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
  457 }
  458 
  459 static bool generate_vzeroupper(Compile* C) {
  460   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
  461 }
  462 
  463 static int clear_avx_size() {
  464   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
  465 }
  466 
  467 // !!!!! Special hack to get all types of calls to specify the byte offset
  468 //       from the start of the call to the point where the return address
  469 //       will point.
  470 int MachCallStaticJavaNode::ret_addr_offset()
  471 {
  472   int offset = 5; // 5 bytes from start of call to where return address points
  473   offset += clear_avx_size();
  474   return offset;
  475 }
  476 
  477 int MachCallDynamicJavaNode::ret_addr_offset()
  478 {
  479   int offset = 15; // 15 bytes from start of call to where return address points
  480   offset += clear_avx_size();
  481   return offset;
  482 }
  483 
  484 int MachCallRuntimeNode::ret_addr_offset() {
  485   if (_entry_point == NULL) {
  486     // CallLeafNoFPInDirect
  487     return 3; // callq (register)
  488   }
  489   int offset = 13; // movq r10,#addr; callq (r10)
  490   if (this->ideal_Opcode() != Op_CallLeafVector) {
  491     offset += clear_avx_size();
  492   }
  493   return offset;
  494 }
  495 
  496 //
  497 // Compute padding required for nodes which need alignment
  498 //
  499 
  500 // The address of the call instruction needs to be 4-byte aligned to
  501 // ensure that it does not span a cache line so that it can be patched.
  502 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
  503 {
  504   current_offset += clear_avx_size(); // skip vzeroupper
  505   current_offset += 1; // skip call opcode byte
  506   return align_up(current_offset, alignment_required()) - current_offset;
  507 }
  508 
  509 // The address of the call instruction needs to be 4-byte aligned to
  510 // ensure that it does not span a cache line so that it can be patched.
  511 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
  512 {
  513   current_offset += clear_avx_size(); // skip vzeroupper
  514   current_offset += 11; // skip movq instruction + call opcode byte
  515   return align_up(current_offset, alignment_required()) - current_offset;
  516 }
  517 
  518 // EMIT_RM()
  519 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  520   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  521   cbuf.insts()->emit_int8(c);
  522 }
  523 
  524 // EMIT_CC()
  525 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  526   unsigned char c = (unsigned char) (f1 | f2);
  527   cbuf.insts()->emit_int8(c);
  528 }
  529 
  530 // EMIT_OPCODE()
  531 void emit_opcode(CodeBuffer &cbuf, int code) {
  532   cbuf.insts()->emit_int8((unsigned char) code);
  533 }
  534 
  535 // EMIT_OPCODE() w/ relocation information
  536 void emit_opcode(CodeBuffer &cbuf,
  537                  int code, relocInfo::relocType reloc, int offset, int format)
  538 {
  539   cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  540   emit_opcode(cbuf, code);
  541 }
  542 
  543 // EMIT_D8()
  544 void emit_d8(CodeBuffer &cbuf, int d8) {
  545   cbuf.insts()->emit_int8((unsigned char) d8);
  546 }
  547 
  548 // EMIT_D16()
  549 void emit_d16(CodeBuffer &cbuf, int d16) {
  550   cbuf.insts()->emit_int16(d16);
  551 }
  552 
  553 // EMIT_D32()
  554 void emit_d32(CodeBuffer &cbuf, int d32) {
  555   cbuf.insts()->emit_int32(d32);
  556 }
  557 
  558 // EMIT_D64()
  559 void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  560   cbuf.insts()->emit_int64(d64);
  561 }
  562 
  563 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  564 void emit_d32_reloc(CodeBuffer& cbuf,
  565                     int d32,
  566                     relocInfo::relocType reloc,
  567                     int format)
  568 {
  569   assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  570   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  571   cbuf.insts()->emit_int32(d32);
  572 }
  573 
  574 // emit 32 bit value and construct relocation entry from RelocationHolder
  575 void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
  576 #ifdef ASSERT
  577   if (rspec.reloc()->type() == relocInfo::oop_type &&
  578       d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
  579     assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
  580     assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  581   }
  582 #endif
  583   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  584   cbuf.insts()->emit_int32(d32);
  585 }
  586 
  587 void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  588   address next_ip = cbuf.insts_end() + 4;
  589   emit_d32_reloc(cbuf, (int) (addr - next_ip),
  590                  external_word_Relocation::spec(addr),
  591                  RELOC_DISP32);
  592 }
  593 
  594 
  595 // emit 64 bit value and construct relocation entry from relocInfo::relocType
  596 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  597   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  598   cbuf.insts()->emit_int64(d64);
  599 }
  600 
  601 // emit 64 bit value and construct relocation entry from RelocationHolder
  602 void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
  603 #ifdef ASSERT
  604   if (rspec.reloc()->type() == relocInfo::oop_type &&
  605       d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
  606     assert(Universe::heap()->is_in((address)d64), "should be real oop");
  607     assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  608   }
  609 #endif
  610   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  611   cbuf.insts()->emit_int64(d64);
  612 }
  613 
  614 // Access stack slot for load or store
  615 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
  616 {
  617   emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  618   if (-0x80 <= disp && disp < 0x80) {
  619     emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
  620     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  621     emit_d8(cbuf, disp);     // Displacement  // R/M byte
  622   } else {
  623     emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
  624     emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
  625     emit_d32(cbuf, disp);     // Displacement // R/M byte
  626   }
  627 }
  628 
  629    // rRegI ereg, memory mem) %{    // emit_reg_mem
  630 void encode_RegMem(CodeBuffer &cbuf,
  631                    int reg,
  632                    int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
  633 {
  634   assert(disp_reloc == relocInfo::none, "cannot have disp");
  635   int regenc = reg & 7;
  636   int baseenc = base & 7;
  637   int indexenc = index & 7;
  638 
  639   // There is no index & no scale, use form without SIB byte
  640   if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
  641     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  642     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  643       emit_rm(cbuf, 0x0, regenc, baseenc); // *
  644     } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  645       // If 8-bit displacement, mode 0x1
  646       emit_rm(cbuf, 0x1, regenc, baseenc); // *
  647       emit_d8(cbuf, disp);
  648     } else {
  649       // If 32-bit displacement
  650       if (base == -1) { // Special flag for absolute address
  651         emit_rm(cbuf, 0x0, regenc, 0x5); // *
  652         if (disp_reloc != relocInfo::none) {
  653           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  654         } else {
  655           emit_d32(cbuf, disp);
  656         }
  657       } else {
  658         // Normal base + offset
  659         emit_rm(cbuf, 0x2, regenc, baseenc); // *
  660         if (disp_reloc != relocInfo::none) {
  661           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  662         } else {
  663           emit_d32(cbuf, disp);
  664         }
  665       }
  666     }
  667   } else {
  668     // Else, encode with the SIB byte
  669     // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
  670     if (disp == 0 && base != RBP_enc && base != R13_enc) {
  671       // If no displacement
  672       emit_rm(cbuf, 0x0, regenc, 0x4); // *
  673       emit_rm(cbuf, scale, indexenc, baseenc);
  674     } else {
  675       if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
  676         // If 8-bit displacement, mode 0x1
  677         emit_rm(cbuf, 0x1, regenc, 0x4); // *
  678         emit_rm(cbuf, scale, indexenc, baseenc);
  679         emit_d8(cbuf, disp);
  680       } else {
  681         // If 32-bit displacement
  682         if (base == 0x04 ) {
  683           emit_rm(cbuf, 0x2, regenc, 0x4);
  684           emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
  685         } else {
  686           emit_rm(cbuf, 0x2, regenc, 0x4);
  687           emit_rm(cbuf, scale, indexenc, baseenc); // *
  688         }
  689         if (disp_reloc != relocInfo::none) {
  690           emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
  691         } else {
  692           emit_d32(cbuf, disp);
  693         }
  694       }
  695     }
  696   }
  697 }
  698 
  699 // This could be in MacroAssembler but it's fairly C2 specific
  700 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  701   Label exit;
  702   __ jccb(Assembler::noParity, exit);
  703   __ pushf();
  704   //
  705   // comiss/ucomiss instructions set ZF,PF,CF flags and
  706   // zero OF,AF,SF for NaN values.
  707   // Fixup flags by zeroing ZF,PF so that compare of NaN
  708   // values returns 'less than' result (CF is set).
  709   // Leave the rest of flags unchanged.
  710   //
  711   //    7 6 5 4 3 2 1 0
  712   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  713   //    0 0 1 0 1 0 1 1   (0x2B)
  714   //
  715   __ andq(Address(rsp, 0), 0xffffff2b);
  716   __ popf();
  717   __ bind(exit);
  718 }
  719 
  720 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  721   Label done;
  722   __ movl(dst, -1);
  723   __ jcc(Assembler::parity, done);
  724   __ jcc(Assembler::below, done);
  725   __ setb(Assembler::notEqual, dst);
  726   __ movzbl(dst, dst);
  727   __ bind(done);
  728 }
  729 
  730 // Math.min()    # Math.max()
  731 // --------------------------
  732 // ucomis[s/d]   #
  733 // ja   -> b     # a
  734 // jp   -> NaN   # NaN
  735 // jb   -> a     # b
  736 // je            #
  737 // |-jz -> a | b # a & b
  738 // |    -> a     #
  739 void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
  740                      XMMRegister a, XMMRegister b,
  741                      XMMRegister xmmt, Register rt,
  742                      bool min, bool single) {
  743 
  744   Label nan, zero, below, above, done;
  745 
  746   if (single)
  747     __ ucomiss(a, b);
  748   else
  749     __ ucomisd(a, b);
  750 
  751   if (dst->encoding() != (min ? b : a)->encoding())
  752     __ jccb(Assembler::above, above); // CF=0 & ZF=0
  753   else
  754     __ jccb(Assembler::above, done);
  755 
  756   __ jccb(Assembler::parity, nan);  // PF=1
  757   __ jccb(Assembler::below, below); // CF=1
  758 
  759   // equal
  760   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  761   if (single) {
  762     __ ucomiss(a, xmmt);
  763     __ jccb(Assembler::equal, zero);
  764 
  765     __ movflt(dst, a);
  766     __ jmp(done);
  767   }
  768   else {
  769     __ ucomisd(a, xmmt);
  770     __ jccb(Assembler::equal, zero);
  771 
  772     __ movdbl(dst, a);
  773     __ jmp(done);
  774   }
  775 
  776   __ bind(zero);
  777   if (min)
  778     __ vpor(dst, a, b, Assembler::AVX_128bit);
  779   else
  780     __ vpand(dst, a, b, Assembler::AVX_128bit);
  781 
  782   __ jmp(done);
  783 
  784   __ bind(above);
  785   if (single)
  786     __ movflt(dst, min ? b : a);
  787   else
  788     __ movdbl(dst, min ? b : a);
  789 
  790   __ jmp(done);
  791 
  792   __ bind(nan);
  793   if (single) {
  794     __ movl(rt, 0x7fc00000); // Float.NaN
  795     __ movdl(dst, rt);
  796   }
  797   else {
  798     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
  799     __ movdq(dst, rt);
  800   }
  801   __ jmp(done);
  802 
  803   __ bind(below);
  804   if (single)
  805     __ movflt(dst, min ? a : b);
  806   else
  807     __ movdbl(dst, min ? a : b);
  808 
  809   __ bind(done);
  810 }
  811 
  812 //=============================================================================
  813 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  814 
  815 int ConstantTable::calculate_table_base_offset() const {
  816   return 0;  // absolute addressing, no offset
  817 }
  818 
  819 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  820 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  821   ShouldNotReachHere();
  822 }
  823 
  824 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  825   // Empty encoding
  826 }
  827 
  828 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  829   return 0;
  830 }
  831 
  832 #ifndef PRODUCT
  833 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  834   st->print("# MachConstantBaseNode (empty encoding)");
  835 }
  836 #endif
  837 
  838 
  839 //=============================================================================
  840 #ifndef PRODUCT
  841 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  842   Compile* C = ra_->C;
  843 
  844   int framesize = C->output()->frame_size_in_bytes();
  845   int bangsize = C->output()->bang_size_in_bytes();
  846   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  847   // Remove wordSize for return addr which is already pushed.
  848   framesize -= wordSize;
  849 
  850   if (C->output()->need_stack_bang(bangsize)) {
  851     framesize -= wordSize;
  852     st->print("# stack bang (%d bytes)", bangsize);
  853     st->print("\n\t");
  854     st->print("pushq   rbp\t# Save rbp");
  855     if (PreserveFramePointer) {
  856         st->print("\n\t");
  857         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  858     }
  859     if (framesize) {
  860       st->print("\n\t");
  861       st->print("subq    rsp, #%d\t# Create frame",framesize);
  862     }
  863   } else {
  864     st->print("subq    rsp, #%d\t# Create frame",framesize);
  865     st->print("\n\t");
  866     framesize -= wordSize;
  867     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
  868     if (PreserveFramePointer) {
  869       st->print("\n\t");
  870       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
  871       if (framesize > 0) {
  872         st->print("\n\t");
  873         st->print("addq    rbp, #%d", framesize);
  874       }
  875     }
  876   }
  877 
  878   if (VerifyStackAtCalls) {
  879     st->print("\n\t");
  880     framesize -= wordSize;
  881     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
  882 #ifdef ASSERT
  883     st->print("\n\t");
  884     st->print("# stack alignment check");
  885 #endif
  886   }
  887   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  888     st->print("\n\t");
  889     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  890     st->print("\n\t");
  891     st->print("je      fast_entry\t");
  892     st->print("\n\t");
  893     st->print("call    #nmethod_entry_barrier_stub\t");
  894     st->print("\n\tfast_entry:");
  895   }
  896   st->cr();
  897 }
  898 #endif
  899 
  900 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  901   Compile* C = ra_->C;
  902   C2_MacroAssembler _masm(&cbuf);
  903 
  904   if (C->clinit_barrier_on_entry()) {
  905     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  906     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  907 
  908     Label L_skip_barrier;
  909     Register klass = rscratch1;
  910 
  911     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  912     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  913 
  914     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  915 
  916     __ bind(L_skip_barrier);
  917   }
  918 
  919   __ verified_entry(C);
  920   __ bind(*_verified_entry);
  921 
  922   if (C->stub_function() == NULL) {
  923     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
  924  #ifdef _LP64
  925     if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  926       // We put the non-hot code of the nmethod entry barrier out-of-line in a stub.
  927       Label dummy_slow_path;
  928       Label dummy_continuation;
  929       Label* slow_path = &dummy_slow_path;
  930       Label* continuation = &dummy_continuation;
  931       if (!Compile::current()->output()->in_scratch_emit_size()) {
  932         // Use real labels from actual stub when not emitting code for the purpose of measuring its size
  933         C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
  934         slow_path = &stub->slow_path();
  935         continuation = &stub->continuation();
  936       }
  937       bs->nmethod_entry_barrier(&_masm, slow_path, continuation);
  938     }
  939 #else
  940     // Don't bother with out-of-line nmethod entry barrier stub for x86_32.
  941     bs->nmethod_entry_barrier(&_masm, NULL /* slow_path */, NULL /* continuation */);
  942 #endif
  943   }
  944 
  945   C->output()->set_frame_complete(cbuf.insts_size());
  946 
  947   if (C->has_mach_constant_base_node()) {
  948     // NOTE: We set the table base offset here because users might be
  949     // emitted before MachConstantBaseNode.
  950     ConstantTable& constant_table = C->output()->constant_table();
  951     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  952   }
  953 }
  954 
  955 int MachPrologNode::reloc() const
  956 {
  957   return 0; // a large enough number
  958 }
  959 
  960 //=============================================================================
  961 #ifndef PRODUCT
  962 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  963 {
  964   Compile* C = ra_->C;
  965   if (generate_vzeroupper(C)) {
  966     st->print("vzeroupper");
  967     st->cr(); st->print("\t");
  968   }
  969 
  970   int framesize = C->output()->frame_size_in_bytes();
  971   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  972   // Remove word for return adr already pushed
  973   // and RBP
  974   framesize -= 2*wordSize;
  975 
  976   if (framesize) {
  977     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
  978     st->print("\t");
  979   }
  980 
  981   st->print_cr("popq    rbp");
  982   if (do_polling() && C->is_method_compilation()) {
  983     st->print("\t");
  984     st->print_cr("cmpq     rsp, poll_offset[r15_thread] \n\t"
  985                  "ja       #safepoint_stub\t"
  986                  "# Safepoint: poll for GC");
  987   }
  988 }
  989 #endif
  990 
  991 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  992 {
  993   Compile* C = ra_->C;
  994   MacroAssembler _masm(&cbuf);
  995 
  996   if (generate_vzeroupper(C)) {
  997     // Clear upper bits of YMM registers when current compiled code uses
  998     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  999     __ vzeroupper();
 1000   }
 1001 
 1002   // Subtract two words to account for return address and rbp
 1003   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
 1004   __ remove_frame(initial_framesize, C->needs_stack_repair());
 1005 
 1006   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1007     __ reserved_stack_check();
 1008   }
 1009 
 1010   if (do_polling() && C->is_method_compilation()) {
 1011     MacroAssembler _masm(&cbuf);
 1012     Label dummy_label;
 1013     Label* code_stub = &dummy_label;
 1014     if (!C->output()->in_scratch_emit_size()) {
 1015       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1016     }
 1017     __ relocate(relocInfo::poll_return_type);
 1018     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
 1019   }
 1020 }
 1021 
 1022 int MachEpilogNode::reloc() const
 1023 {
 1024   return 2; // a large enough number
 1025 }
 1026 
 1027 const Pipeline* MachEpilogNode::pipeline() const
 1028 {
 1029   return MachNode::pipeline_class();
 1030 }
 1031 
 1032 //=============================================================================
 1033 
 1034 enum RC {
 1035   rc_bad,
 1036   rc_int,
 1037   rc_kreg,
 1038   rc_float,
 1039   rc_stack
 1040 };
 1041 
 1042 static enum RC rc_class(OptoReg::Name reg)
 1043 {
 1044   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 1045 
 1046   if (OptoReg::is_stack(reg)) return rc_stack;
 1047 
 1048   VMReg r = OptoReg::as_VMReg(reg);
 1049 
 1050   if (r->is_Register()) return rc_int;
 1051 
 1052   if (r->is_KRegister()) return rc_kreg;
 1053 
 1054   assert(r->is_XMMRegister(), "must be");
 1055   return rc_float;
 1056 }
 1057 
 1058 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 1059 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
 1060                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 1061 
 1062 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
 1063                      int stack_offset, int reg, uint ireg, outputStream* st);
 1064 
 1065 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
 1066                                       int dst_offset, uint ireg, outputStream* st) {
 1067   if (cbuf) {
 1068     MacroAssembler _masm(cbuf);
 1069     switch (ireg) {
 1070     case Op_VecS:
 1071       __ movq(Address(rsp, -8), rax);
 1072       __ movl(rax, Address(rsp, src_offset));
 1073       __ movl(Address(rsp, dst_offset), rax);
 1074       __ movq(rax, Address(rsp, -8));
 1075       break;
 1076     case Op_VecD:
 1077       __ pushq(Address(rsp, src_offset));
 1078       __ popq (Address(rsp, dst_offset));
 1079       break;
 1080     case Op_VecX:
 1081       __ pushq(Address(rsp, src_offset));
 1082       __ popq (Address(rsp, dst_offset));
 1083       __ pushq(Address(rsp, src_offset+8));
 1084       __ popq (Address(rsp, dst_offset+8));
 1085       break;
 1086     case Op_VecY:
 1087       __ vmovdqu(Address(rsp, -32), xmm0);
 1088       __ vmovdqu(xmm0, Address(rsp, src_offset));
 1089       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 1090       __ vmovdqu(xmm0, Address(rsp, -32));
 1091       break;
 1092     case Op_VecZ:
 1093       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 1094       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1095       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1096       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1097       break;
 1098     default:
 1099       ShouldNotReachHere();
 1100     }
 1101 #ifndef PRODUCT
 1102   } else {
 1103     switch (ireg) {
 1104     case Op_VecS:
 1105       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1106                 "movl    rax, [rsp + #%d]\n\t"
 1107                 "movl    [rsp + #%d], rax\n\t"
 1108                 "movq    rax, [rsp - #8]",
 1109                 src_offset, dst_offset);
 1110       break;
 1111     case Op_VecD:
 1112       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1113                 "popq    [rsp + #%d]",
 1114                 src_offset, dst_offset);
 1115       break;
 1116      case Op_VecX:
 1117       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 1118                 "popq    [rsp + #%d]\n\t"
 1119                 "pushq   [rsp + #%d]\n\t"
 1120                 "popq    [rsp + #%d]",
 1121                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 1122       break;
 1123     case Op_VecY:
 1124       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1125                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1126                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1127                 "vmovdqu xmm0, [rsp - #32]",
 1128                 src_offset, dst_offset);
 1129       break;
 1130     case Op_VecZ:
 1131       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1132                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1133                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1134                 "vmovdqu xmm0, [rsp - #64]",
 1135                 src_offset, dst_offset);
 1136       break;
 1137     default:
 1138       ShouldNotReachHere();
 1139     }
 1140 #endif
 1141   }
 1142 }
 1143 
 1144 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
 1145                                        PhaseRegAlloc* ra_,
 1146                                        bool do_size,
 1147                                        outputStream* st) const {
 1148   assert(cbuf != NULL || st  != NULL, "sanity");
 1149   // Get registers to move
 1150   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1151   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1152   OptoReg::Name dst_second = ra_->get_reg_second(this);
 1153   OptoReg::Name dst_first = ra_->get_reg_first(this);
 1154 
 1155   enum RC src_second_rc = rc_class(src_second);
 1156   enum RC src_first_rc = rc_class(src_first);
 1157   enum RC dst_second_rc = rc_class(dst_second);
 1158   enum RC dst_first_rc = rc_class(dst_first);
 1159 
 1160   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 1161          "must move at least 1 register" );
 1162 
 1163   if (src_first == dst_first && src_second == dst_second) {
 1164     // Self copy, no move
 1165     return 0;
 1166   }
 1167   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1168     uint ireg = ideal_reg();
 1169     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1170     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1171     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1172       // mem -> mem
 1173       int src_offset = ra_->reg2offset(src_first);
 1174       int dst_offset = ra_->reg2offset(dst_first);
 1175       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1176     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1177       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1178     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1179       int stack_offset = ra_->reg2offset(dst_first);
 1180       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1181     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 1182       int stack_offset = ra_->reg2offset(src_first);
 1183       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1184     } else {
 1185       ShouldNotReachHere();
 1186     }
 1187     return 0;
 1188   }
 1189   if (src_first_rc == rc_stack) {
 1190     // mem ->
 1191     if (dst_first_rc == rc_stack) {
 1192       // mem -> mem
 1193       assert(src_second != dst_first, "overlap");
 1194       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1195           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1196         // 64-bit
 1197         int src_offset = ra_->reg2offset(src_first);
 1198         int dst_offset = ra_->reg2offset(dst_first);
 1199         if (cbuf) {
 1200           MacroAssembler _masm(cbuf);
 1201           __ pushq(Address(rsp, src_offset));
 1202           __ popq (Address(rsp, dst_offset));
 1203 #ifndef PRODUCT
 1204         } else {
 1205           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1206                     "popq    [rsp + #%d]",
 1207                      src_offset, dst_offset);
 1208 #endif
 1209         }
 1210       } else {
 1211         // 32-bit
 1212         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1213         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1214         // No pushl/popl, so:
 1215         int src_offset = ra_->reg2offset(src_first);
 1216         int dst_offset = ra_->reg2offset(dst_first);
 1217         if (cbuf) {
 1218           MacroAssembler _masm(cbuf);
 1219           __ movq(Address(rsp, -8), rax);
 1220           __ movl(rax, Address(rsp, src_offset));
 1221           __ movl(Address(rsp, dst_offset), rax);
 1222           __ movq(rax, Address(rsp, -8));
 1223 #ifndef PRODUCT
 1224         } else {
 1225           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 1226                     "movl    rax, [rsp + #%d]\n\t"
 1227                     "movl    [rsp + #%d], rax\n\t"
 1228                     "movq    rax, [rsp - #8]",
 1229                      src_offset, dst_offset);
 1230 #endif
 1231         }
 1232       }
 1233       return 0;
 1234     } else if (dst_first_rc == rc_int) {
 1235       // mem -> gpr
 1236       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1237           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1238         // 64-bit
 1239         int offset = ra_->reg2offset(src_first);
 1240         if (cbuf) {
 1241           MacroAssembler _masm(cbuf);
 1242           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1243 #ifndef PRODUCT
 1244         } else {
 1245           st->print("movq    %s, [rsp + #%d]\t# spill",
 1246                      Matcher::regName[dst_first],
 1247                      offset);
 1248 #endif
 1249         }
 1250       } else {
 1251         // 32-bit
 1252         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1253         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1254         int offset = ra_->reg2offset(src_first);
 1255         if (cbuf) {
 1256           MacroAssembler _masm(cbuf);
 1257           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1258 #ifndef PRODUCT
 1259         } else {
 1260           st->print("movl    %s, [rsp + #%d]\t# spill",
 1261                      Matcher::regName[dst_first],
 1262                      offset);
 1263 #endif
 1264         }
 1265       }
 1266       return 0;
 1267     } else if (dst_first_rc == rc_float) {
 1268       // mem-> xmm
 1269       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1270           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1271         // 64-bit
 1272         int offset = ra_->reg2offset(src_first);
 1273         if (cbuf) {
 1274           MacroAssembler _masm(cbuf);
 1275           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1276 #ifndef PRODUCT
 1277         } else {
 1278           st->print("%s  %s, [rsp + #%d]\t# spill",
 1279                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 1280                      Matcher::regName[dst_first],
 1281                      offset);
 1282 #endif
 1283         }
 1284       } else {
 1285         // 32-bit
 1286         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1287         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1288         int offset = ra_->reg2offset(src_first);
 1289         if (cbuf) {
 1290           MacroAssembler _masm(cbuf);
 1291           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1292 #ifndef PRODUCT
 1293         } else {
 1294           st->print("movss   %s, [rsp + #%d]\t# spill",
 1295                      Matcher::regName[dst_first],
 1296                      offset);
 1297 #endif
 1298         }
 1299       }
 1300       return 0;
 1301     } else if (dst_first_rc == rc_kreg) {
 1302       // mem -> kreg
 1303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1305         // 64-bit
 1306         int offset = ra_->reg2offset(src_first);
 1307         if (cbuf) {
 1308           MacroAssembler _masm(cbuf);
 1309           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1310 #ifndef PRODUCT
 1311         } else {
 1312           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 1313                      Matcher::regName[dst_first],
 1314                      offset);
 1315 #endif
 1316         }
 1317       }
 1318       return 0;
 1319     }
 1320   } else if (src_first_rc == rc_int) {
 1321     // gpr ->
 1322     if (dst_first_rc == rc_stack) {
 1323       // gpr -> mem
 1324       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1325           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1326         // 64-bit
 1327         int offset = ra_->reg2offset(dst_first);
 1328         if (cbuf) {
 1329           MacroAssembler _masm(cbuf);
 1330           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1331 #ifndef PRODUCT
 1332         } else {
 1333           st->print("movq    [rsp + #%d], %s\t# spill",
 1334                      offset,
 1335                      Matcher::regName[src_first]);
 1336 #endif
 1337         }
 1338       } else {
 1339         // 32-bit
 1340         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1341         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1342         int offset = ra_->reg2offset(dst_first);
 1343         if (cbuf) {
 1344           MacroAssembler _masm(cbuf);
 1345           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 1346 #ifndef PRODUCT
 1347         } else {
 1348           st->print("movl    [rsp + #%d], %s\t# spill",
 1349                      offset,
 1350                      Matcher::regName[src_first]);
 1351 #endif
 1352         }
 1353       }
 1354       return 0;
 1355     } else if (dst_first_rc == rc_int) {
 1356       // gpr -> gpr
 1357       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1358           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1359         // 64-bit
 1360         if (cbuf) {
 1361           MacroAssembler _masm(cbuf);
 1362           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 1363                   as_Register(Matcher::_regEncode[src_first]));
 1364 #ifndef PRODUCT
 1365         } else {
 1366           st->print("movq    %s, %s\t# spill",
 1367                      Matcher::regName[dst_first],
 1368                      Matcher::regName[src_first]);
 1369 #endif
 1370         }
 1371         return 0;
 1372       } else {
 1373         // 32-bit
 1374         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1375         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1376         if (cbuf) {
 1377           MacroAssembler _masm(cbuf);
 1378           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 1379                   as_Register(Matcher::_regEncode[src_first]));
 1380 #ifndef PRODUCT
 1381         } else {
 1382           st->print("movl    %s, %s\t# spill",
 1383                      Matcher::regName[dst_first],
 1384                      Matcher::regName[src_first]);
 1385 #endif
 1386         }
 1387         return 0;
 1388       }
 1389     } else if (dst_first_rc == rc_float) {
 1390       // gpr -> xmm
 1391       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1392           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1393         // 64-bit
 1394         if (cbuf) {
 1395           MacroAssembler _masm(cbuf);
 1396           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1397 #ifndef PRODUCT
 1398         } else {
 1399           st->print("movdq   %s, %s\t# spill",
 1400                      Matcher::regName[dst_first],
 1401                      Matcher::regName[src_first]);
 1402 #endif
 1403         }
 1404       } else {
 1405         // 32-bit
 1406         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1407         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1408         if (cbuf) {
 1409           MacroAssembler _masm(cbuf);
 1410           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1411 #ifndef PRODUCT
 1412         } else {
 1413           st->print("movdl   %s, %s\t# spill",
 1414                      Matcher::regName[dst_first],
 1415                      Matcher::regName[src_first]);
 1416 #endif
 1417         }
 1418       }
 1419       return 0;
 1420     } else if (dst_first_rc == rc_kreg) {
 1421       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1422           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1423         // 64-bit
 1424         if (cbuf) {
 1425           MacroAssembler _masm(cbuf);
 1426           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 1427   #ifndef PRODUCT
 1428         } else {
 1429            st->print("kmovq   %s, %s\t# spill",
 1430                        Matcher::regName[dst_first],
 1431                        Matcher::regName[src_first]);
 1432   #endif
 1433         }
 1434       }
 1435       Unimplemented();
 1436       return 0;
 1437     }
 1438   } else if (src_first_rc == rc_float) {
 1439     // xmm ->
 1440     if (dst_first_rc == rc_stack) {
 1441       // xmm -> mem
 1442       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1443           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1444         // 64-bit
 1445         int offset = ra_->reg2offset(dst_first);
 1446         if (cbuf) {
 1447           MacroAssembler _masm(cbuf);
 1448           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1449 #ifndef PRODUCT
 1450         } else {
 1451           st->print("movsd   [rsp + #%d], %s\t# spill",
 1452                      offset,
 1453                      Matcher::regName[src_first]);
 1454 #endif
 1455         }
 1456       } else {
 1457         // 32-bit
 1458         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1459         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1460         int offset = ra_->reg2offset(dst_first);
 1461         if (cbuf) {
 1462           MacroAssembler _masm(cbuf);
 1463           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 1464 #ifndef PRODUCT
 1465         } else {
 1466           st->print("movss   [rsp + #%d], %s\t# spill",
 1467                      offset,
 1468                      Matcher::regName[src_first]);
 1469 #endif
 1470         }
 1471       }
 1472       return 0;
 1473     } else if (dst_first_rc == rc_int) {
 1474       // xmm -> gpr
 1475       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1476           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1477         // 64-bit
 1478         if (cbuf) {
 1479           MacroAssembler _masm(cbuf);
 1480           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1481 #ifndef PRODUCT
 1482         } else {
 1483           st->print("movdq   %s, %s\t# spill",
 1484                      Matcher::regName[dst_first],
 1485                      Matcher::regName[src_first]);
 1486 #endif
 1487         }
 1488       } else {
 1489         // 32-bit
 1490         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1491         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1492         if (cbuf) {
 1493           MacroAssembler _masm(cbuf);
 1494           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1495 #ifndef PRODUCT
 1496         } else {
 1497           st->print("movdl   %s, %s\t# spill",
 1498                      Matcher::regName[dst_first],
 1499                      Matcher::regName[src_first]);
 1500 #endif
 1501         }
 1502       }
 1503       return 0;
 1504     } else if (dst_first_rc == rc_float) {
 1505       // xmm -> xmm
 1506       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1507           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1508         // 64-bit
 1509         if (cbuf) {
 1510           MacroAssembler _masm(cbuf);
 1511           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1512 #ifndef PRODUCT
 1513         } else {
 1514           st->print("%s  %s, %s\t# spill",
 1515                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 1516                      Matcher::regName[dst_first],
 1517                      Matcher::regName[src_first]);
 1518 #endif
 1519         }
 1520       } else {
 1521         // 32-bit
 1522         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 1523         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 1524         if (cbuf) {
 1525           MacroAssembler _masm(cbuf);
 1526           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 1527 #ifndef PRODUCT
 1528         } else {
 1529           st->print("%s  %s, %s\t# spill",
 1530                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 1531                      Matcher::regName[dst_first],
 1532                      Matcher::regName[src_first]);
 1533 #endif
 1534         }
 1535       }
 1536       return 0;
 1537     } else if (dst_first_rc == rc_kreg) {
 1538       assert(false, "Illegal spilling");
 1539       return 0;
 1540     }
 1541   } else if (src_first_rc == rc_kreg) {
 1542     if (dst_first_rc == rc_stack) {
 1543       // mem -> kreg
 1544       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1545           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1546         // 64-bit
 1547         int offset = ra_->reg2offset(dst_first);
 1548         if (cbuf) {
 1549           MacroAssembler _masm(cbuf);
 1550           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1551 #ifndef PRODUCT
 1552         } else {
 1553           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 1554                      offset,
 1555                      Matcher::regName[src_first]);
 1556 #endif
 1557         }
 1558       }
 1559       return 0;
 1560     } else if (dst_first_rc == rc_int) {
 1561       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1562           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1563         // 64-bit
 1564         if (cbuf) {
 1565           MacroAssembler _masm(cbuf);
 1566           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1567 #ifndef PRODUCT
 1568         } else {
 1569          st->print("kmovq   %s, %s\t# spill",
 1570                      Matcher::regName[dst_first],
 1571                      Matcher::regName[src_first]);
 1572 #endif
 1573         }
 1574       }
 1575       Unimplemented();
 1576       return 0;
 1577     } else if (dst_first_rc == rc_kreg) {
 1578       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 1579           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 1580         // 64-bit
 1581         if (cbuf) {
 1582           MacroAssembler _masm(cbuf);
 1583           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1584 #ifndef PRODUCT
 1585         } else {
 1586          st->print("kmovq   %s, %s\t# spill",
 1587                      Matcher::regName[dst_first],
 1588                      Matcher::regName[src_first]);
 1589 #endif
 1590         }
 1591       }
 1592       return 0;
 1593     } else if (dst_first_rc == rc_float) {
 1594       assert(false, "Illegal spill");
 1595       return 0;
 1596     }
 1597   }
 1598 
 1599   assert(0," foo ");
 1600   Unimplemented();
 1601   return 0;
 1602 }
 1603 
 1604 #ifndef PRODUCT
 1605 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1606   implementation(NULL, ra_, false, st);
 1607 }
 1608 #endif
 1609 
 1610 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1611   implementation(&cbuf, ra_, false, NULL);
 1612 }
 1613 
 1614 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1615   return MachNode::size(ra_);
 1616 }
 1617 
 1618 //=============================================================================
 1619 #ifndef PRODUCT
 1620 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1621 {
 1622   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1623   int reg = ra_->get_reg_first(this);
 1624   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 1625             Matcher::regName[reg], offset);
 1626 }
 1627 #endif
 1628 
 1629 void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1630 {
 1631   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1632   int reg = ra_->get_encode(this);
 1633   if (offset >= 0x80) {
 1634     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1635     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1636     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1637     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1638     emit_d32(cbuf, offset);
 1639   } else {
 1640     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1641     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1642     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1643     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1644     emit_d8(cbuf, offset);
 1645   }
 1646 }
 1647 
 1648 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1649 {
 1650   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1651   return (offset < 0x80) ? 5 : 8; // REX
 1652 }
 1653 
 1654 //=============================================================================
 1655 #ifndef PRODUCT
 1656 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1657 {
 1658   st->print_cr("MachVEPNode");
 1659 }
 1660 #endif
 1661 
 1662 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1663 {
 1664   C2_MacroAssembler _masm(&cbuf);
 1665   if (!_verified) {
 1666     uint insts_size = cbuf.insts_size();
 1667     if (UseCompressedClassPointers) {
 1668       __ load_klass(rscratch1, j_rarg0, rscratch2);
 1669       __ cmpptr(rax, rscratch1);
 1670     } else {
 1671       __ cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1672     }
 1673     __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1674   } else {
 1675     // Unpack inline type args passed as oop and then jump to
 1676     // the verified entry point (skipping the unverified entry).
 1677     int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
 1678     // Emit code for verified entry and save increment for stack repair on return
 1679     __ verified_entry(ra_->C, sp_inc);
 1680     __ jmp(*_verified_entry);
 1681   }
 1682 }
 1683 
 1684 //=============================================================================
 1685 #ifndef PRODUCT
 1686 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1687 {
 1688   if (UseCompressedClassPointers) {
 1689     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1690     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1691     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1692   } else {
 1693     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1694                  "# Inline cache check");
 1695   }
 1696   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1697   st->print_cr("\tnop\t# nops to align entry point");
 1698 }
 1699 #endif
 1700 
 1701 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1702 {
 1703   MacroAssembler masm(&cbuf);
 1704   uint insts_size = cbuf.insts_size();
 1705   if (UseCompressedClassPointers) {
 1706     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1707     masm.cmpptr(rax, rscratch1);
 1708   } else {
 1709     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1710   }
 1711 
 1712   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1713 
 1714   /* WARNING these NOPs are critical so that verified entry point is properly
 1715      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1716   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1717   if (OptoBreakpoint) {
 1718     // Leave space for int3
 1719     nops_cnt -= 1;
 1720   }
 1721   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1722   if (nops_cnt > 0)
 1723     masm.nop(nops_cnt);
 1724 }
 1725 
 1726 //=============================================================================
 1727 
 1728 const bool Matcher::supports_vector_calling_convention(void) {
 1729   if (EnableVectorSupport && UseVectorStubs) {
 1730     return true;
 1731   }
 1732   return false;
 1733 }
 1734 
 1735 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1736   assert(EnableVectorSupport && UseVectorStubs, "sanity");
 1737   int lo = XMM0_num;
 1738   int hi = XMM0b_num;
 1739   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 1740   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 1741   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 1742   return OptoRegPair(hi, lo);
 1743 }
 1744 
 1745 // Is this branch offset short enough that a short branch can be used?
 1746 //
 1747 // NOTE: If the platform does not provide any short branch variants, then
 1748 //       this method should return false for offset 0.
 1749 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1750   // The passed offset is relative to address of the branch.
 1751   // On 86 a branch displacement is calculated relative to address
 1752   // of a next instruction.
 1753   offset -= br_size;
 1754 
 1755   // the short version of jmpConUCF2 contains multiple branches,
 1756   // making the reach slightly less
 1757   if (rule == jmpConUCF2_rule)
 1758     return (-126 <= offset && offset <= 125);
 1759   return (-128 <= offset && offset <= 127);
 1760 }
 1761 
 1762 // Return whether or not this register is ever used as an argument.
 1763 // This function is used on startup to build the trampoline stubs in
 1764 // generateOptoStub.  Registers not mentioned will be killed by the VM
 1765 // call in the trampoline, and arguments in those registers not be
 1766 // available to the callee.
 1767 bool Matcher::can_be_java_arg(int reg)
 1768 {
 1769   return
 1770     reg ==  RDI_num || reg == RDI_H_num ||
 1771     reg ==  RSI_num || reg == RSI_H_num ||
 1772     reg ==  RDX_num || reg == RDX_H_num ||
 1773     reg ==  RCX_num || reg == RCX_H_num ||
 1774     reg ==   R8_num || reg ==  R8_H_num ||
 1775     reg ==   R9_num || reg ==  R9_H_num ||
 1776     reg ==  R12_num || reg == R12_H_num ||
 1777     reg == XMM0_num || reg == XMM0b_num ||
 1778     reg == XMM1_num || reg == XMM1b_num ||
 1779     reg == XMM2_num || reg == XMM2b_num ||
 1780     reg == XMM3_num || reg == XMM3b_num ||
 1781     reg == XMM4_num || reg == XMM4b_num ||
 1782     reg == XMM5_num || reg == XMM5b_num ||
 1783     reg == XMM6_num || reg == XMM6b_num ||
 1784     reg == XMM7_num || reg == XMM7b_num;
 1785 }
 1786 
 1787 bool Matcher::is_spillable_arg(int reg)
 1788 {
 1789   return can_be_java_arg(reg);
 1790 }
 1791 
 1792 uint Matcher::int_pressure_limit()
 1793 {
 1794   return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
 1795 }
 1796 
 1797 uint Matcher::float_pressure_limit()
 1798 {
 1799   // After experiment around with different values, the following default threshold
 1800   // works best for LCM's register pressure scheduling on x64.
 1801   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 1802   uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
 1803   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 1804 }
 1805 
 1806 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1807   // In 64 bit mode a code which use multiply when
 1808   // devisor is constant is faster than hardware
 1809   // DIV instruction (it uses MulHiL).
 1810   return false;
 1811 }
 1812 
 1813 // Register for DIVI projection of divmodI
 1814 RegMask Matcher::divI_proj_mask() {
 1815   return INT_RAX_REG_mask();
 1816 }
 1817 
 1818 // Register for MODI projection of divmodI
 1819 RegMask Matcher::modI_proj_mask() {
 1820   return INT_RDX_REG_mask();
 1821 }
 1822 
 1823 // Register for DIVL projection of divmodL
 1824 RegMask Matcher::divL_proj_mask() {
 1825   return LONG_RAX_REG_mask();
 1826 }
 1827 
 1828 // Register for MODL projection of divmodL
 1829 RegMask Matcher::modL_proj_mask() {
 1830   return LONG_RDX_REG_mask();
 1831 }
 1832 
 1833 // Register for saving SP into on method handle invokes. Not used on x86_64.
 1834 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1835     return NO_REG_mask();
 1836 }
 1837 
 1838 %}
 1839 
 1840 //----------ENCODING BLOCK-----------------------------------------------------
 1841 // This block specifies the encoding classes used by the compiler to
 1842 // output byte streams.  Encoding classes are parameterized macros
 1843 // used by Machine Instruction Nodes in order to generate the bit
 1844 // encoding of the instruction.  Operands specify their base encoding
 1845 // interface with the interface keyword.  There are currently
 1846 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 1847 // COND_INTER.  REG_INTER causes an operand to generate a function
 1848 // which returns its register number when queried.  CONST_INTER causes
 1849 // an operand to generate a function which returns the value of the
 1850 // constant when queried.  MEMORY_INTER causes an operand to generate
 1851 // four functions which return the Base Register, the Index Register,
 1852 // the Scale Value, and the Offset Value of the operand when queried.
 1853 // COND_INTER causes an operand to generate six functions which return
 1854 // the encoding code (ie - encoding bits for the instruction)
 1855 // associated with each basic boolean condition for a conditional
 1856 // instruction.
 1857 //
 1858 // Instructions specify two basic values for encoding.  Again, a
 1859 // function is available to check if the constant displacement is an
 1860 // oop. They use the ins_encode keyword to specify their encoding
 1861 // classes (which must be a sequence of enc_class names, and their
 1862 // parameters, specified in the encoding block), and they use the
 1863 // opcode keyword to specify, in order, their primary, secondary, and
 1864 // tertiary opcode.  Only the opcode sections which a particular
 1865 // instruction needs for encoding need to be specified.
 1866 encode %{
 1867   // Build emit functions for each basic byte or larger field in the
 1868   // intel encoding scheme (opcode, rm, sib, immediate), and call them
 1869   // from C++ code in the enc_class source block.  Emit functions will
 1870   // live in the main source block for now.  In future, we can
 1871   // generalize this by adding a syntax that specifies the sizes of
 1872   // fields in an order, so that the adlc can build the emit functions
 1873   // automagically
 1874 
 1875   // Emit primary opcode
 1876   enc_class OpcP
 1877   %{
 1878     emit_opcode(cbuf, $primary);
 1879   %}
 1880 
 1881   // Emit secondary opcode
 1882   enc_class OpcS
 1883   %{
 1884     emit_opcode(cbuf, $secondary);
 1885   %}
 1886 
 1887   // Emit tertiary opcode
 1888   enc_class OpcT
 1889   %{
 1890     emit_opcode(cbuf, $tertiary);
 1891   %}
 1892 
 1893   // Emit opcode directly
 1894   enc_class Opcode(immI d8)
 1895   %{
 1896     emit_opcode(cbuf, $d8$$constant);
 1897   %}
 1898 
 1899   // Emit size prefix
 1900   enc_class SizePrefix
 1901   %{
 1902     emit_opcode(cbuf, 0x66);
 1903   %}
 1904 
 1905   enc_class reg(rRegI reg)
 1906   %{
 1907     emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
 1908   %}
 1909 
 1910   enc_class reg_reg(rRegI dst, rRegI src)
 1911   %{
 1912     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1913   %}
 1914 
 1915   enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
 1916   %{
 1917     emit_opcode(cbuf, $opcode$$constant);
 1918     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
 1919   %}
 1920 
 1921   enc_class cdql_enc(no_rax_rdx_RegI div)
 1922   %{
 1923     // Full implementation of Java idiv and irem; checks for
 1924     // special case as described in JVM spec., p.243 & p.271.
 1925     //
 1926     //         normal case                           special case
 1927     //
 1928     // input : rax: dividend                         min_int
 1929     //         reg: divisor                          -1
 1930     //
 1931     // output: rax: quotient  (= rax idiv reg)       min_int
 1932     //         rdx: remainder (= rax irem reg)       0
 1933     //
 1934     //  Code sequnce:
 1935     //
 1936     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 1937     //    5:   75 07/08                jne    e <normal>
 1938     //    7:   33 d2                   xor    %edx,%edx
 1939     //  [div >= 8 -> offset + 1]
 1940     //  [REX_B]
 1941     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 1942     //    c:   74 03/04                je     11 <done>
 1943     // 000000000000000e <normal>:
 1944     //    e:   99                      cltd
 1945     //  [div >= 8 -> offset + 1]
 1946     //  [REX_B]
 1947     //    f:   f7 f9                   idiv   $div
 1948     // 0000000000000011 <done>:
 1949     MacroAssembler _masm(&cbuf);
 1950     Label normal;
 1951     Label done;
 1952 
 1953     // cmp    $0x80000000,%eax
 1954     __ cmpl(as_Register(RAX_enc), 0x80000000);
 1955 
 1956     // jne    e <normal>
 1957     __ jccb(Assembler::notEqual, normal);
 1958 
 1959     // xor    %edx,%edx
 1960     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 1961 
 1962     // cmp    $0xffffffffffffffff,%ecx
 1963     __ cmpl($div$$Register, -1);
 1964 
 1965     // je     11 <done>
 1966     __ jccb(Assembler::equal, done);
 1967 
 1968     // <normal>
 1969     // cltd
 1970     __ bind(normal);
 1971     __ cdql();
 1972 
 1973     // idivl
 1974     // <done>
 1975     __ idivl($div$$Register);
 1976     __ bind(done);
 1977   %}
 1978 
 1979   enc_class cdqq_enc(no_rax_rdx_RegL div)
 1980   %{
 1981     // Full implementation of Java ldiv and lrem; checks for
 1982     // special case as described in JVM spec., p.243 & p.271.
 1983     //
 1984     //         normal case                           special case
 1985     //
 1986     // input : rax: dividend                         min_long
 1987     //         reg: divisor                          -1
 1988     //
 1989     // output: rax: quotient  (= rax idiv reg)       min_long
 1990     //         rdx: remainder (= rax irem reg)       0
 1991     //
 1992     //  Code sequnce:
 1993     //
 1994     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 1995     //    7:   00 00 80
 1996     //    a:   48 39 d0                cmp    %rdx,%rax
 1997     //    d:   75 08                   jne    17 <normal>
 1998     //    f:   33 d2                   xor    %edx,%edx
 1999     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 2000     //   15:   74 05                   je     1c <done>
 2001     // 0000000000000017 <normal>:
 2002     //   17:   48 99                   cqto
 2003     //   19:   48 f7 f9                idiv   $div
 2004     // 000000000000001c <done>:
 2005     MacroAssembler _masm(&cbuf);
 2006     Label normal;
 2007     Label done;
 2008 
 2009     // mov    $0x8000000000000000,%rdx
 2010     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 2011 
 2012     // cmp    %rdx,%rax
 2013     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 2014 
 2015     // jne    17 <normal>
 2016     __ jccb(Assembler::notEqual, normal);
 2017 
 2018     // xor    %edx,%edx
 2019     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 2020 
 2021     // cmp    $0xffffffffffffffff,$div
 2022     __ cmpq($div$$Register, -1);
 2023 
 2024     // je     1e <done>
 2025     __ jccb(Assembler::equal, done);
 2026 
 2027     // <normal>
 2028     // cqto
 2029     __ bind(normal);
 2030     __ cdqq();
 2031 
 2032     // idivq (note: must be emitted by the user of this rule)
 2033     // <done>
 2034     __ idivq($div$$Register);
 2035     __ bind(done);
 2036   %}
 2037 
 2038   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 2039   enc_class OpcSE(immI imm)
 2040   %{
 2041     // Emit primary opcode and set sign-extend bit
 2042     // Check for 8-bit immediate, and set sign extend bit in opcode
 2043     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2044       emit_opcode(cbuf, $primary | 0x02);
 2045     } else {
 2046       // 32-bit immediate
 2047       emit_opcode(cbuf, $primary);
 2048     }
 2049   %}
 2050 
 2051   enc_class OpcSErm(rRegI dst, immI imm)
 2052   %{
 2053     // OpcSEr/m
 2054     int dstenc = $dst$$reg;
 2055     if (dstenc >= 8) {
 2056       emit_opcode(cbuf, Assembler::REX_B);
 2057       dstenc -= 8;
 2058     }
 2059     // Emit primary opcode and set sign-extend bit
 2060     // Check for 8-bit immediate, and set sign extend bit in opcode
 2061     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2062       emit_opcode(cbuf, $primary | 0x02);
 2063     } else {
 2064       // 32-bit immediate
 2065       emit_opcode(cbuf, $primary);
 2066     }
 2067     // Emit r/m byte with secondary opcode, after primary opcode.
 2068     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2069   %}
 2070 
 2071   enc_class OpcSErm_wide(rRegL dst, immI imm)
 2072   %{
 2073     // OpcSEr/m
 2074     int dstenc = $dst$$reg;
 2075     if (dstenc < 8) {
 2076       emit_opcode(cbuf, Assembler::REX_W);
 2077     } else {
 2078       emit_opcode(cbuf, Assembler::REX_WB);
 2079       dstenc -= 8;
 2080     }
 2081     // Emit primary opcode and set sign-extend bit
 2082     // Check for 8-bit immediate, and set sign extend bit in opcode
 2083     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2084       emit_opcode(cbuf, $primary | 0x02);
 2085     } else {
 2086       // 32-bit immediate
 2087       emit_opcode(cbuf, $primary);
 2088     }
 2089     // Emit r/m byte with secondary opcode, after primary opcode.
 2090     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2091   %}
 2092 
 2093   enc_class Con8or32(immI imm)
 2094   %{
 2095     // Check for 8-bit immediate, and set sign extend bit in opcode
 2096     if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
 2097       $$$emit8$imm$$constant;
 2098     } else {
 2099       // 32-bit immediate
 2100       $$$emit32$imm$$constant;
 2101     }
 2102   %}
 2103 
 2104   enc_class opc2_reg(rRegI dst)
 2105   %{
 2106     // BSWAP
 2107     emit_cc(cbuf, $secondary, $dst$$reg);
 2108   %}
 2109 
 2110   enc_class opc3_reg(rRegI dst)
 2111   %{
 2112     // BSWAP
 2113     emit_cc(cbuf, $tertiary, $dst$$reg);
 2114   %}
 2115 
 2116   enc_class reg_opc(rRegI div)
 2117   %{
 2118     // INC, DEC, IDIV, IMOD, JMP indirect, ...
 2119     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
 2120   %}
 2121 
 2122   enc_class enc_cmov(cmpOp cop)
 2123   %{
 2124     // CMOV
 2125     $$$emit8$primary;
 2126     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 2127   %}
 2128 
 2129   enc_class enc_PartialSubtypeCheck()
 2130   %{
 2131     Register Rrdi = as_Register(RDI_enc); // result register
 2132     Register Rrax = as_Register(RAX_enc); // super class
 2133     Register Rrcx = as_Register(RCX_enc); // killed
 2134     Register Rrsi = as_Register(RSI_enc); // sub class
 2135     Label miss;
 2136     const bool set_cond_codes = true;
 2137 
 2138     MacroAssembler _masm(&cbuf);
 2139     __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
 2140                                      NULL, &miss,
 2141                                      /*set_cond_codes:*/ true);
 2142     if ($primary) {
 2143       __ xorptr(Rrdi, Rrdi);
 2144     }
 2145     __ bind(miss);
 2146   %}
 2147 
 2148   enc_class clear_avx %{
 2149     debug_only(int off0 = cbuf.insts_size());
 2150     if (generate_vzeroupper(Compile::current())) {
 2151       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 2152       // Clear upper bits of YMM registers when current compiled code uses
 2153       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 2154       MacroAssembler _masm(&cbuf);
 2155       __ vzeroupper();
 2156     }
 2157     debug_only(int off1 = cbuf.insts_size());
 2158     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 2159   %}
 2160 
 2161   enc_class Java_To_Runtime(method meth) %{
 2162     // No relocation needed
 2163     MacroAssembler _masm(&cbuf);
 2164     __ mov64(r10, (int64_t) $meth$$method);
 2165     __ call(r10);
 2166     __ post_call_nop();
 2167   %}
 2168 
 2169   enc_class Java_Static_Call(method meth)
 2170   %{
 2171     // JAVA STATIC CALL
 2172     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 2173     // determine who we intended to call.
 2174     MacroAssembler _masm(&cbuf);
 2175     cbuf.set_insts_mark();
 2176     $$$emit8$primary;
 2177 
 2178     if (!_method) {
 2179       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2180                      runtime_call_Relocation::spec(),
 2181                      RELOC_DISP32);
 2182     } else {
 2183       int method_index = resolved_method_index(cbuf);
 2184       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 2185                                                   : static_call_Relocation::spec(method_index);
 2186       emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
 2187                      rspec, RELOC_DISP32);
 2188       address mark = cbuf.insts_mark();
 2189       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 2190         // Calls of the same statically bound method can share
 2191         // a stub to the interpreter.
 2192         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 2193       } else {
 2194         // Emit stubs for static call.
 2195         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 2196         if (stub == NULL) {
 2197           ciEnv::current()->record_failure("CodeCache is full");
 2198           return;
 2199         }
 2200       }
 2201     }
 2202     _masm.clear_inst_mark();
 2203     __ post_call_nop();
 2204   %}
 2205 
 2206   enc_class Java_Dynamic_Call(method meth) %{
 2207     MacroAssembler _masm(&cbuf);
 2208     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 2209     __ post_call_nop();
 2210   %}
 2211 
 2212   enc_class reg_opc_imm(rRegI dst, immI8 shift)
 2213   %{
 2214     // SAL, SAR, SHR
 2215     int dstenc = $dst$$reg;
 2216     if (dstenc >= 8) {
 2217       emit_opcode(cbuf, Assembler::REX_B);
 2218       dstenc -= 8;
 2219     }
 2220     $$$emit8$primary;
 2221     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2222     $$$emit8$shift$$constant;
 2223   %}
 2224 
 2225   enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
 2226   %{
 2227     // SAL, SAR, SHR
 2228     int dstenc = $dst$$reg;
 2229     if (dstenc < 8) {
 2230       emit_opcode(cbuf, Assembler::REX_W);
 2231     } else {
 2232       emit_opcode(cbuf, Assembler::REX_WB);
 2233       dstenc -= 8;
 2234     }
 2235     $$$emit8$primary;
 2236     emit_rm(cbuf, 0x3, $secondary, dstenc);
 2237     $$$emit8$shift$$constant;
 2238   %}
 2239 
 2240   enc_class load_immI(rRegI dst, immI src)
 2241   %{
 2242     int dstenc = $dst$$reg;
 2243     if (dstenc >= 8) {
 2244       emit_opcode(cbuf, Assembler::REX_B);
 2245       dstenc -= 8;
 2246     }
 2247     emit_opcode(cbuf, 0xB8 | dstenc);
 2248     $$$emit32$src$$constant;
 2249   %}
 2250 
 2251   enc_class load_immL(rRegL dst, immL src)
 2252   %{
 2253     int dstenc = $dst$$reg;
 2254     if (dstenc < 8) {
 2255       emit_opcode(cbuf, Assembler::REX_W);
 2256     } else {
 2257       emit_opcode(cbuf, Assembler::REX_WB);
 2258       dstenc -= 8;
 2259     }
 2260     emit_opcode(cbuf, 0xB8 | dstenc);
 2261     emit_d64(cbuf, $src$$constant);
 2262   %}
 2263 
 2264   enc_class load_immUL32(rRegL dst, immUL32 src)
 2265   %{
 2266     // same as load_immI, but this time we care about zeroes in the high word
 2267     int dstenc = $dst$$reg;
 2268     if (dstenc >= 8) {
 2269       emit_opcode(cbuf, Assembler::REX_B);
 2270       dstenc -= 8;
 2271     }
 2272     emit_opcode(cbuf, 0xB8 | dstenc);
 2273     $$$emit32$src$$constant;
 2274   %}
 2275 
 2276   enc_class load_immL32(rRegL dst, immL32 src)
 2277   %{
 2278     int dstenc = $dst$$reg;
 2279     if (dstenc < 8) {
 2280       emit_opcode(cbuf, Assembler::REX_W);
 2281     } else {
 2282       emit_opcode(cbuf, Assembler::REX_WB);
 2283       dstenc -= 8;
 2284     }
 2285     emit_opcode(cbuf, 0xC7);
 2286     emit_rm(cbuf, 0x03, 0x00, dstenc);
 2287     $$$emit32$src$$constant;
 2288   %}
 2289 
 2290   enc_class load_immP31(rRegP dst, immP32 src)
 2291   %{
 2292     // same as load_immI, but this time we care about zeroes in the high word
 2293     int dstenc = $dst$$reg;
 2294     if (dstenc >= 8) {
 2295       emit_opcode(cbuf, Assembler::REX_B);
 2296       dstenc -= 8;
 2297     }
 2298     emit_opcode(cbuf, 0xB8 | dstenc);
 2299     $$$emit32$src$$constant;
 2300   %}
 2301 
 2302   enc_class load_immP(rRegP dst, immP src)
 2303   %{
 2304     int dstenc = $dst$$reg;
 2305     if (dstenc < 8) {
 2306       emit_opcode(cbuf, Assembler::REX_W);
 2307     } else {
 2308       emit_opcode(cbuf, Assembler::REX_WB);
 2309       dstenc -= 8;
 2310     }
 2311     emit_opcode(cbuf, 0xB8 | dstenc);
 2312     // This next line should be generated from ADLC
 2313     if ($src->constant_reloc() != relocInfo::none) {
 2314       emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
 2315     } else {
 2316       emit_d64(cbuf, $src$$constant);
 2317     }
 2318   %}
 2319 
 2320   enc_class Con32(immI src)
 2321   %{
 2322     // Output immediate
 2323     $$$emit32$src$$constant;
 2324   %}
 2325 
 2326   enc_class Con32F_as_bits(immF src)
 2327   %{
 2328     // Output Float immediate bits
 2329     jfloat jf = $src$$constant;
 2330     jint jf_as_bits = jint_cast(jf);
 2331     emit_d32(cbuf, jf_as_bits);
 2332   %}
 2333 
 2334   enc_class Con16(immI src)
 2335   %{
 2336     // Output immediate
 2337     $$$emit16$src$$constant;
 2338   %}
 2339 
 2340   // How is this different from Con32??? XXX
 2341   enc_class Con_d32(immI src)
 2342   %{
 2343     emit_d32(cbuf,$src$$constant);
 2344   %}
 2345 
 2346   enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
 2347     // Output immediate memory reference
 2348     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2349     emit_d32(cbuf, 0x00);
 2350   %}
 2351 
 2352   enc_class lock_prefix()
 2353   %{
 2354     emit_opcode(cbuf, 0xF0); // lock
 2355   %}
 2356 
 2357   enc_class REX_mem(memory mem)
 2358   %{
 2359     if ($mem$$base >= 8) {
 2360       if ($mem$$index < 8) {
 2361         emit_opcode(cbuf, Assembler::REX_B);
 2362       } else {
 2363         emit_opcode(cbuf, Assembler::REX_XB);
 2364       }
 2365     } else {
 2366       if ($mem$$index >= 8) {
 2367         emit_opcode(cbuf, Assembler::REX_X);
 2368       }
 2369     }
 2370   %}
 2371 
 2372   enc_class REX_mem_wide(memory mem)
 2373   %{
 2374     if ($mem$$base >= 8) {
 2375       if ($mem$$index < 8) {
 2376         emit_opcode(cbuf, Assembler::REX_WB);
 2377       } else {
 2378         emit_opcode(cbuf, Assembler::REX_WXB);
 2379       }
 2380     } else {
 2381       if ($mem$$index < 8) {
 2382         emit_opcode(cbuf, Assembler::REX_W);
 2383       } else {
 2384         emit_opcode(cbuf, Assembler::REX_WX);
 2385       }
 2386     }
 2387   %}
 2388 
 2389   // for byte regs
 2390   enc_class REX_breg(rRegI reg)
 2391   %{
 2392     if ($reg$$reg >= 4) {
 2393       emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2394     }
 2395   %}
 2396 
 2397   // for byte regs
 2398   enc_class REX_reg_breg(rRegI dst, rRegI src)
 2399   %{
 2400     if ($dst$$reg < 8) {
 2401       if ($src$$reg >= 4) {
 2402         emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
 2403       }
 2404     } else {
 2405       if ($src$$reg < 8) {
 2406         emit_opcode(cbuf, Assembler::REX_R);
 2407       } else {
 2408         emit_opcode(cbuf, Assembler::REX_RB);
 2409       }
 2410     }
 2411   %}
 2412 
 2413   // for byte regs
 2414   enc_class REX_breg_mem(rRegI reg, memory mem)
 2415   %{
 2416     if ($reg$$reg < 8) {
 2417       if ($mem$$base < 8) {
 2418         if ($mem$$index >= 8) {
 2419           emit_opcode(cbuf, Assembler::REX_X);
 2420         } else if ($reg$$reg >= 4) {
 2421           emit_opcode(cbuf, Assembler::REX);
 2422         }
 2423       } else {
 2424         if ($mem$$index < 8) {
 2425           emit_opcode(cbuf, Assembler::REX_B);
 2426         } else {
 2427           emit_opcode(cbuf, Assembler::REX_XB);
 2428         }
 2429       }
 2430     } else {
 2431       if ($mem$$base < 8) {
 2432         if ($mem$$index < 8) {
 2433           emit_opcode(cbuf, Assembler::REX_R);
 2434         } else {
 2435           emit_opcode(cbuf, Assembler::REX_RX);
 2436         }
 2437       } else {
 2438         if ($mem$$index < 8) {
 2439           emit_opcode(cbuf, Assembler::REX_RB);
 2440         } else {
 2441           emit_opcode(cbuf, Assembler::REX_RXB);
 2442         }
 2443       }
 2444     }
 2445   %}
 2446 
 2447   enc_class REX_reg(rRegI reg)
 2448   %{
 2449     if ($reg$$reg >= 8) {
 2450       emit_opcode(cbuf, Assembler::REX_B);
 2451     }
 2452   %}
 2453 
 2454   enc_class REX_reg_wide(rRegI reg)
 2455   %{
 2456     if ($reg$$reg < 8) {
 2457       emit_opcode(cbuf, Assembler::REX_W);
 2458     } else {
 2459       emit_opcode(cbuf, Assembler::REX_WB);
 2460     }
 2461   %}
 2462 
 2463   enc_class REX_reg_reg(rRegI dst, rRegI src)
 2464   %{
 2465     if ($dst$$reg < 8) {
 2466       if ($src$$reg >= 8) {
 2467         emit_opcode(cbuf, Assembler::REX_B);
 2468       }
 2469     } else {
 2470       if ($src$$reg < 8) {
 2471         emit_opcode(cbuf, Assembler::REX_R);
 2472       } else {
 2473         emit_opcode(cbuf, Assembler::REX_RB);
 2474       }
 2475     }
 2476   %}
 2477 
 2478   enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
 2479   %{
 2480     if ($dst$$reg < 8) {
 2481       if ($src$$reg < 8) {
 2482         emit_opcode(cbuf, Assembler::REX_W);
 2483       } else {
 2484         emit_opcode(cbuf, Assembler::REX_WB);
 2485       }
 2486     } else {
 2487       if ($src$$reg < 8) {
 2488         emit_opcode(cbuf, Assembler::REX_WR);
 2489       } else {
 2490         emit_opcode(cbuf, Assembler::REX_WRB);
 2491       }
 2492     }
 2493   %}
 2494 
 2495   enc_class REX_reg_mem(rRegI reg, memory mem)
 2496   %{
 2497     if ($reg$$reg < 8) {
 2498       if ($mem$$base < 8) {
 2499         if ($mem$$index >= 8) {
 2500           emit_opcode(cbuf, Assembler::REX_X);
 2501         }
 2502       } else {
 2503         if ($mem$$index < 8) {
 2504           emit_opcode(cbuf, Assembler::REX_B);
 2505         } else {
 2506           emit_opcode(cbuf, Assembler::REX_XB);
 2507         }
 2508       }
 2509     } else {
 2510       if ($mem$$base < 8) {
 2511         if ($mem$$index < 8) {
 2512           emit_opcode(cbuf, Assembler::REX_R);
 2513         } else {
 2514           emit_opcode(cbuf, Assembler::REX_RX);
 2515         }
 2516       } else {
 2517         if ($mem$$index < 8) {
 2518           emit_opcode(cbuf, Assembler::REX_RB);
 2519         } else {
 2520           emit_opcode(cbuf, Assembler::REX_RXB);
 2521         }
 2522       }
 2523     }
 2524   %}
 2525 
 2526   enc_class REX_reg_mem_wide(rRegL reg, memory mem)
 2527   %{
 2528     if ($reg$$reg < 8) {
 2529       if ($mem$$base < 8) {
 2530         if ($mem$$index < 8) {
 2531           emit_opcode(cbuf, Assembler::REX_W);
 2532         } else {
 2533           emit_opcode(cbuf, Assembler::REX_WX);
 2534         }
 2535       } else {
 2536         if ($mem$$index < 8) {
 2537           emit_opcode(cbuf, Assembler::REX_WB);
 2538         } else {
 2539           emit_opcode(cbuf, Assembler::REX_WXB);
 2540         }
 2541       }
 2542     } else {
 2543       if ($mem$$base < 8) {
 2544         if ($mem$$index < 8) {
 2545           emit_opcode(cbuf, Assembler::REX_WR);
 2546         } else {
 2547           emit_opcode(cbuf, Assembler::REX_WRX);
 2548         }
 2549       } else {
 2550         if ($mem$$index < 8) {
 2551           emit_opcode(cbuf, Assembler::REX_WRB);
 2552         } else {
 2553           emit_opcode(cbuf, Assembler::REX_WRXB);
 2554         }
 2555       }
 2556     }
 2557   %}
 2558 
 2559   enc_class reg_mem(rRegI ereg, memory mem)
 2560   %{
 2561     // High registers handle in encode_RegMem
 2562     int reg = $ereg$$reg;
 2563     int base = $mem$$base;
 2564     int index = $mem$$index;
 2565     int scale = $mem$$scale;
 2566     int disp = $mem$$disp;
 2567     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2568 
 2569     encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
 2570   %}
 2571 
 2572   enc_class RM_opc_mem(immI rm_opcode, memory mem)
 2573   %{
 2574     int rm_byte_opcode = $rm_opcode$$constant;
 2575 
 2576     // High registers handle in encode_RegMem
 2577     int base = $mem$$base;
 2578     int index = $mem$$index;
 2579     int scale = $mem$$scale;
 2580     int displace = $mem$$disp;
 2581 
 2582     relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
 2583                                             // working with static
 2584                                             // globals
 2585     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
 2586                   disp_reloc);
 2587   %}
 2588 
 2589   enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
 2590   %{
 2591     int reg_encoding = $dst$$reg;
 2592     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2593     int index        = 0x04;            // 0x04 indicates no index
 2594     int scale        = 0x00;            // 0x00 indicates no scale
 2595     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2596     relocInfo::relocType disp_reloc = relocInfo::none;
 2597     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
 2598                   disp_reloc);
 2599   %}
 2600 
 2601   enc_class neg_reg(rRegI dst)
 2602   %{
 2603     int dstenc = $dst$$reg;
 2604     if (dstenc >= 8) {
 2605       emit_opcode(cbuf, Assembler::REX_B);
 2606       dstenc -= 8;
 2607     }
 2608     // NEG $dst
 2609     emit_opcode(cbuf, 0xF7);
 2610     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2611   %}
 2612 
 2613   enc_class neg_reg_wide(rRegI dst)
 2614   %{
 2615     int dstenc = $dst$$reg;
 2616     if (dstenc < 8) {
 2617       emit_opcode(cbuf, Assembler::REX_W);
 2618     } else {
 2619       emit_opcode(cbuf, Assembler::REX_WB);
 2620       dstenc -= 8;
 2621     }
 2622     // NEG $dst
 2623     emit_opcode(cbuf, 0xF7);
 2624     emit_rm(cbuf, 0x3, 0x03, dstenc);
 2625   %}
 2626 
 2627   enc_class setLT_reg(rRegI dst)
 2628   %{
 2629     int dstenc = $dst$$reg;
 2630     if (dstenc >= 8) {
 2631       emit_opcode(cbuf, Assembler::REX_B);
 2632       dstenc -= 8;
 2633     } else if (dstenc >= 4) {
 2634       emit_opcode(cbuf, Assembler::REX);
 2635     }
 2636     // SETLT $dst
 2637     emit_opcode(cbuf, 0x0F);
 2638     emit_opcode(cbuf, 0x9C);
 2639     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2640   %}
 2641 
 2642   enc_class setNZ_reg(rRegI dst)
 2643   %{
 2644     int dstenc = $dst$$reg;
 2645     if (dstenc >= 8) {
 2646       emit_opcode(cbuf, Assembler::REX_B);
 2647       dstenc -= 8;
 2648     } else if (dstenc >= 4) {
 2649       emit_opcode(cbuf, Assembler::REX);
 2650     }
 2651     // SETNZ $dst
 2652     emit_opcode(cbuf, 0x0F);
 2653     emit_opcode(cbuf, 0x95);
 2654     emit_rm(cbuf, 0x3, 0x0, dstenc);
 2655   %}
 2656 
 2657 
 2658   // Compare the lonogs and set -1, 0, or 1 into dst
 2659   enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
 2660   %{
 2661     int src1enc = $src1$$reg;
 2662     int src2enc = $src2$$reg;
 2663     int dstenc = $dst$$reg;
 2664 
 2665     // cmpq $src1, $src2
 2666     if (src1enc < 8) {
 2667       if (src2enc < 8) {
 2668         emit_opcode(cbuf, Assembler::REX_W);
 2669       } else {
 2670         emit_opcode(cbuf, Assembler::REX_WB);
 2671       }
 2672     } else {
 2673       if (src2enc < 8) {
 2674         emit_opcode(cbuf, Assembler::REX_WR);
 2675       } else {
 2676         emit_opcode(cbuf, Assembler::REX_WRB);
 2677       }
 2678     }
 2679     emit_opcode(cbuf, 0x3B);
 2680     emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);
 2681 
 2682     // movl $dst, -1
 2683     if (dstenc >= 8) {
 2684       emit_opcode(cbuf, Assembler::REX_B);
 2685     }
 2686     emit_opcode(cbuf, 0xB8 | (dstenc & 7));
 2687     emit_d32(cbuf, -1);
 2688 
 2689     // jl,s done
 2690     emit_opcode(cbuf, 0x7C);
 2691     emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
 2692 
 2693     // setne $dst
 2694     if (dstenc >= 4) {
 2695       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
 2696     }
 2697     emit_opcode(cbuf, 0x0F);
 2698     emit_opcode(cbuf, 0x95);
 2699     emit_opcode(cbuf, 0xC0 | (dstenc & 7));
 2700 
 2701     // movzbl $dst, $dst
 2702     if (dstenc >= 4) {
 2703       emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
 2704     }
 2705     emit_opcode(cbuf, 0x0F);
 2706     emit_opcode(cbuf, 0xB6);
 2707     emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
 2708   %}
 2709 
 2710   enc_class Push_ResultXD(regD dst) %{
 2711     MacroAssembler _masm(&cbuf);
 2712     __ fstp_d(Address(rsp, 0));
 2713     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2714     __ addptr(rsp, 8);
 2715   %}
 2716 
 2717   enc_class Push_SrcXD(regD src) %{
 2718     MacroAssembler _masm(&cbuf);
 2719     __ subptr(rsp, 8);
 2720     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2721     __ fld_d(Address(rsp, 0));
 2722   %}
 2723 
 2724 
 2725   enc_class enc_rethrow()
 2726   %{
 2727     cbuf.set_insts_mark();
 2728     emit_opcode(cbuf, 0xE9); // jmp entry
 2729     emit_d32_reloc(cbuf,
 2730                    (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
 2731                    runtime_call_Relocation::spec(),
 2732                    RELOC_DISP32);
 2733   %}
 2734 
 2735 %}
 2736 
 2737 
 2738 
 2739 //----------FRAME--------------------------------------------------------------
 2740 // Definition of frame structure and management information.
 2741 //
 2742 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2743 //                             |   (to get allocators register number
 2744 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2745 //  r   CALLER     |        |
 2746 //  o     |        +--------+      pad to even-align allocators stack-slot
 2747 //  w     V        |  pad0  |        numbers; owned by CALLER
 2748 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 2749 //  h     ^        |   in   |  5
 2750 //        |        |  args  |  4   Holes in incoming args owned by SELF
 2751 //  |     |        |        |  3
 2752 //  |     |        +--------+
 2753 //  V     |        | old out|      Empty on Intel, window on Sparc
 2754 //        |    old |preserve|      Must be even aligned.
 2755 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 2756 //        |        |   in   |  3   area for Intel ret address
 2757 //     Owned by    |preserve|      Empty on Sparc.
 2758 //       SELF      +--------+
 2759 //        |        |  pad2  |  2   pad to align old SP
 2760 //        |        +--------+  1
 2761 //        |        | locks  |  0
 2762 //        |        +--------+----> OptoReg::stack0(), even aligned
 2763 //        |        |  pad1  | 11   pad to align new SP
 2764 //        |        +--------+
 2765 //        |        |        | 10
 2766 //        |        | spills |  9   spills
 2767 //        V        |        |  8   (pad0 slot for callee)
 2768 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 2769 //        ^        |  out   |  7
 2770 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 2771 //     Owned by    +--------+
 2772 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 2773 //        |    new |preserve|      Must be even-aligned.
 2774 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 2775 //        |        |        |
 2776 //
 2777 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 2778 //         known from SELF's arguments and the Java calling convention.
 2779 //         Region 6-7 is determined per call site.
 2780 // Note 2: If the calling convention leaves holes in the incoming argument
 2781 //         area, those holes are owned by SELF.  Holes in the outgoing area
 2782 //         are owned by the CALLEE.  Holes should not be necessary in the
 2783 //         incoming area, as the Java calling convention is completely under
 2784 //         the control of the AD file.  Doubles can be sorted and packed to
 2785 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 2786 //         varargs C calling conventions.
 2787 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 2788 //         even aligned with pad0 as needed.
 2789 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 2790 //         region 6-11 is even aligned; it may be padded out more so that
 2791 //         the region from SP to FP meets the minimum stack alignment.
 2792 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 2793 //         alignment.  Region 11, pad1, may be dynamically extended so that
 2794 //         SP meets the minimum alignment.
 2795 
 2796 frame
 2797 %{
 2798   // These three registers define part of the calling convention
 2799   // between compiled code and the interpreter.
 2800   inline_cache_reg(RAX);                // Inline Cache Register
 2801 
 2802   // Optional: name the operand used by cisc-spilling to access
 2803   // [stack_pointer + offset]
 2804   cisc_spilling_operand_name(indOffset32);
 2805 
 2806   // Number of stack slots consumed by locking an object
 2807   sync_stack_slots(2);
 2808 
 2809   // Compiled code's Frame Pointer
 2810   frame_pointer(RSP);
 2811 
 2812   // Interpreter stores its frame pointer in a register which is
 2813   // stored to the stack by I2CAdaptors.
 2814   // I2CAdaptors convert from interpreted java to compiled java.
 2815   interpreter_frame_pointer(RBP);
 2816 
 2817   // Stack alignment requirement
 2818   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 2819 
 2820   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 2821   // for calls to C.  Supports the var-args backing area for register parms.
 2822   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 2823 
 2824   // The after-PROLOG location of the return address.  Location of
 2825   // return address specifies a type (REG or STACK) and a number
 2826   // representing the register number (i.e. - use a register name) or
 2827   // stack slot.
 2828   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 2829   // Otherwise, it is above the locks and verification slot and alignment word
 2830   return_addr(STACK - 2 +
 2831               align_up((Compile::current()->in_preserve_stack_slots() +
 2832                         Compile::current()->fixed_slots()),
 2833                        stack_alignment_in_slots()));
 2834 
 2835   // Location of compiled Java return values.  Same as C for now.
 2836   return_value
 2837   %{
 2838     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 2839            "only return normal values");
 2840 
 2841     static const int lo[Op_RegL + 1] = {
 2842       0,
 2843       0,
 2844       RAX_num,  // Op_RegN
 2845       RAX_num,  // Op_RegI
 2846       RAX_num,  // Op_RegP
 2847       XMM0_num, // Op_RegF
 2848       XMM0_num, // Op_RegD
 2849       RAX_num   // Op_RegL
 2850     };
 2851     static const int hi[Op_RegL + 1] = {
 2852       0,
 2853       0,
 2854       OptoReg::Bad, // Op_RegN
 2855       OptoReg::Bad, // Op_RegI
 2856       RAX_H_num,    // Op_RegP
 2857       OptoReg::Bad, // Op_RegF
 2858       XMM0b_num,    // Op_RegD
 2859       RAX_H_num     // Op_RegL
 2860     };
 2861     // Excluded flags and vector registers.
 2862     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 2863     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 2864   %}
 2865 %}
 2866 
 2867 //----------ATTRIBUTES---------------------------------------------------------
 2868 //----------Operand Attributes-------------------------------------------------
 2869 op_attrib op_cost(0);        // Required cost attribute
 2870 
 2871 //----------Instruction Attributes---------------------------------------------
 2872 ins_attrib ins_cost(100);       // Required cost attribute
 2873 ins_attrib ins_size(8);         // Required size attribute (in bits)
 2874 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 2875                                 // a non-matching short branch variant
 2876                                 // of some long branch?
 2877 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 2878                                 // be a power of 2) specifies the
 2879                                 // alignment that some part of the
 2880                                 // instruction (not necessarily the
 2881                                 // start) requires.  If > 1, a
 2882                                 // compute_padding() function must be
 2883                                 // provided for the instruction
 2884 
 2885 //----------OPERANDS-----------------------------------------------------------
 2886 // Operand definitions must precede instruction definitions for correct parsing
 2887 // in the ADLC because operands constitute user defined types which are used in
 2888 // instruction definitions.
 2889 
 2890 //----------Simple Operands----------------------------------------------------
 2891 // Immediate Operands
 2892 // Integer Immediate
 2893 operand immI()
 2894 %{
 2895   match(ConI);
 2896 
 2897   op_cost(10);
 2898   format %{ %}
 2899   interface(CONST_INTER);
 2900 %}
 2901 
 2902 // Constant for test vs zero
 2903 operand immI_0()
 2904 %{
 2905   predicate(n->get_int() == 0);
 2906   match(ConI);
 2907 
 2908   op_cost(0);
 2909   format %{ %}
 2910   interface(CONST_INTER);
 2911 %}
 2912 
 2913 // Constant for increment
 2914 operand immI_1()
 2915 %{
 2916   predicate(n->get_int() == 1);
 2917   match(ConI);
 2918 
 2919   op_cost(0);
 2920   format %{ %}
 2921   interface(CONST_INTER);
 2922 %}
 2923 
 2924 // Constant for decrement
 2925 operand immI_M1()
 2926 %{
 2927   predicate(n->get_int() == -1);
 2928   match(ConI);
 2929 
 2930   op_cost(0);
 2931   format %{ %}
 2932   interface(CONST_INTER);
 2933 %}
 2934 
 2935 operand immI_2()
 2936 %{
 2937   predicate(n->get_int() == 2);
 2938   match(ConI);
 2939 
 2940   op_cost(0);
 2941   format %{ %}
 2942   interface(CONST_INTER);
 2943 %}
 2944 
 2945 operand immI_4()
 2946 %{
 2947   predicate(n->get_int() == 4);
 2948   match(ConI);
 2949 
 2950   op_cost(0);
 2951   format %{ %}
 2952   interface(CONST_INTER);
 2953 %}
 2954 
 2955 operand immI_8()
 2956 %{
 2957   predicate(n->get_int() == 8);
 2958   match(ConI);
 2959 
 2960   op_cost(0);
 2961   format %{ %}
 2962   interface(CONST_INTER);
 2963 %}
 2964 
 2965 // Valid scale values for addressing modes
 2966 operand immI2()
 2967 %{
 2968   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 2969   match(ConI);
 2970 
 2971   format %{ %}
 2972   interface(CONST_INTER);
 2973 %}
 2974 
 2975 operand immU7()
 2976 %{
 2977   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 2978   match(ConI);
 2979 
 2980   op_cost(5);
 2981   format %{ %}
 2982   interface(CONST_INTER);
 2983 %}
 2984 
 2985 operand immI8()
 2986 %{
 2987   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 2988   match(ConI);
 2989 
 2990   op_cost(5);
 2991   format %{ %}
 2992   interface(CONST_INTER);
 2993 %}
 2994 
 2995 operand immU8()
 2996 %{
 2997   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 2998   match(ConI);
 2999 
 3000   op_cost(5);
 3001   format %{ %}
 3002   interface(CONST_INTER);
 3003 %}
 3004 
 3005 operand immI16()
 3006 %{
 3007   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3008   match(ConI);
 3009 
 3010   op_cost(10);
 3011   format %{ %}
 3012   interface(CONST_INTER);
 3013 %}
 3014 
 3015 // Int Immediate non-negative
 3016 operand immU31()
 3017 %{
 3018   predicate(n->get_int() >= 0);
 3019   match(ConI);
 3020 
 3021   op_cost(0);
 3022   format %{ %}
 3023   interface(CONST_INTER);
 3024 %}
 3025 
 3026 // Constant for long shifts
 3027 operand immI_32()
 3028 %{
 3029   predicate( n->get_int() == 32 );
 3030   match(ConI);
 3031 
 3032   op_cost(0);
 3033   format %{ %}
 3034   interface(CONST_INTER);
 3035 %}
 3036 
 3037 // Constant for long shifts
 3038 operand immI_64()
 3039 %{
 3040   predicate( n->get_int() == 64 );
 3041   match(ConI);
 3042 
 3043   op_cost(0);
 3044   format %{ %}
 3045   interface(CONST_INTER);
 3046 %}
 3047 
 3048 // Pointer Immediate
 3049 operand immP()
 3050 %{
 3051   match(ConP);
 3052 
 3053   op_cost(10);
 3054   format %{ %}
 3055   interface(CONST_INTER);
 3056 %}
 3057 
 3058 // NULL Pointer Immediate
 3059 operand immP0()
 3060 %{
 3061   predicate(n->get_ptr() == 0);
 3062   match(ConP);
 3063 
 3064   op_cost(5);
 3065   format %{ %}
 3066   interface(CONST_INTER);
 3067 %}
 3068 
 3069 // Pointer Immediate
 3070 operand immN() %{
 3071   match(ConN);
 3072 
 3073   op_cost(10);
 3074   format %{ %}
 3075   interface(CONST_INTER);
 3076 %}
 3077 
 3078 operand immNKlass() %{
 3079   match(ConNKlass);
 3080 
 3081   op_cost(10);
 3082   format %{ %}
 3083   interface(CONST_INTER);
 3084 %}
 3085 
 3086 // NULL Pointer Immediate
 3087 operand immN0() %{
 3088   predicate(n->get_narrowcon() == 0);
 3089   match(ConN);
 3090 
 3091   op_cost(5);
 3092   format %{ %}
 3093   interface(CONST_INTER);
 3094 %}
 3095 
 3096 operand immP31()
 3097 %{
 3098   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 3099             && (n->get_ptr() >> 31) == 0);
 3100   match(ConP);
 3101 
 3102   op_cost(5);
 3103   format %{ %}
 3104   interface(CONST_INTER);
 3105 %}
 3106 
 3107 
 3108 // Long Immediate
 3109 operand immL()
 3110 %{
 3111   match(ConL);
 3112 
 3113   op_cost(20);
 3114   format %{ %}
 3115   interface(CONST_INTER);
 3116 %}
 3117 
 3118 // Long Immediate 8-bit
 3119 operand immL8()
 3120 %{
 3121   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 3122   match(ConL);
 3123 
 3124   op_cost(5);
 3125   format %{ %}
 3126   interface(CONST_INTER);
 3127 %}
 3128 
 3129 // Long Immediate 32-bit unsigned
 3130 operand immUL32()
 3131 %{
 3132   predicate(n->get_long() == (unsigned int) (n->get_long()));
 3133   match(ConL);
 3134 
 3135   op_cost(10);
 3136   format %{ %}
 3137   interface(CONST_INTER);
 3138 %}
 3139 
 3140 // Long Immediate 32-bit signed
 3141 operand immL32()
 3142 %{
 3143   predicate(n->get_long() == (int) (n->get_long()));
 3144   match(ConL);
 3145 
 3146   op_cost(15);
 3147   format %{ %}
 3148   interface(CONST_INTER);
 3149 %}
 3150 
 3151 operand immL_Pow2()
 3152 %{
 3153   predicate(is_power_of_2((julong)n->get_long()));
 3154   match(ConL);
 3155 
 3156   op_cost(15);
 3157   format %{ %}
 3158   interface(CONST_INTER);
 3159 %}
 3160 
 3161 operand immL_NotPow2()
 3162 %{
 3163   predicate(is_power_of_2((julong)~n->get_long()));
 3164   match(ConL);
 3165 
 3166   op_cost(15);
 3167   format %{ %}
 3168   interface(CONST_INTER);
 3169 %}
 3170 
 3171 // Long Immediate zero
 3172 operand immL0()
 3173 %{
 3174   predicate(n->get_long() == 0L);
 3175   match(ConL);
 3176 
 3177   op_cost(10);
 3178   format %{ %}
 3179   interface(CONST_INTER);
 3180 %}
 3181 
 3182 // Constant for increment
 3183 operand immL1()
 3184 %{
 3185   predicate(n->get_long() == 1);
 3186   match(ConL);
 3187 
 3188   format %{ %}
 3189   interface(CONST_INTER);
 3190 %}
 3191 
 3192 // Constant for decrement
 3193 operand immL_M1()
 3194 %{
 3195   predicate(n->get_long() == -1);
 3196   match(ConL);
 3197 
 3198   format %{ %}
 3199   interface(CONST_INTER);
 3200 %}
 3201 
 3202 // Long Immediate: the value 10
 3203 operand immL10()
 3204 %{
 3205   predicate(n->get_long() == 10);
 3206   match(ConL);
 3207 
 3208   format %{ %}
 3209   interface(CONST_INTER);
 3210 %}
 3211 
 3212 // Long immediate from 0 to 127.
 3213 // Used for a shorter form of long mul by 10.
 3214 operand immL_127()
 3215 %{
 3216   predicate(0 <= n->get_long() && n->get_long() < 0x80);
 3217   match(ConL);
 3218 
 3219   op_cost(10);
 3220   format %{ %}
 3221   interface(CONST_INTER);
 3222 %}
 3223 
 3224 // Long Immediate: low 32-bit mask
 3225 operand immL_32bits()
 3226 %{
 3227   predicate(n->get_long() == 0xFFFFFFFFL);
 3228   match(ConL);
 3229   op_cost(20);
 3230 
 3231   format %{ %}
 3232   interface(CONST_INTER);
 3233 %}
 3234 
 3235 // Int Immediate: 2^n-1, positive
 3236 operand immI_Pow2M1()
 3237 %{
 3238   predicate((n->get_int() > 0)
 3239             && is_power_of_2(n->get_int() + 1));
 3240   match(ConI);
 3241 
 3242   op_cost(20);
 3243   format %{ %}
 3244   interface(CONST_INTER);
 3245 %}
 3246 
 3247 // Float Immediate zero
 3248 operand immF0()
 3249 %{
 3250   predicate(jint_cast(n->getf()) == 0);
 3251   match(ConF);
 3252 
 3253   op_cost(5);
 3254   format %{ %}
 3255   interface(CONST_INTER);
 3256 %}
 3257 
 3258 // Float Immediate
 3259 operand immF()
 3260 %{
 3261   match(ConF);
 3262 
 3263   op_cost(15);
 3264   format %{ %}
 3265   interface(CONST_INTER);
 3266 %}
 3267 
 3268 // Double Immediate zero
 3269 operand immD0()
 3270 %{
 3271   predicate(jlong_cast(n->getd()) == 0);
 3272   match(ConD);
 3273 
 3274   op_cost(5);
 3275   format %{ %}
 3276   interface(CONST_INTER);
 3277 %}
 3278 
 3279 // Double Immediate
 3280 operand immD()
 3281 %{
 3282   match(ConD);
 3283 
 3284   op_cost(15);
 3285   format %{ %}
 3286   interface(CONST_INTER);
 3287 %}
 3288 
 3289 // Immediates for special shifts (sign extend)
 3290 
 3291 // Constants for increment
 3292 operand immI_16()
 3293 %{
 3294   predicate(n->get_int() == 16);
 3295   match(ConI);
 3296 
 3297   format %{ %}
 3298   interface(CONST_INTER);
 3299 %}
 3300 
 3301 operand immI_24()
 3302 %{
 3303   predicate(n->get_int() == 24);
 3304   match(ConI);
 3305 
 3306   format %{ %}
 3307   interface(CONST_INTER);
 3308 %}
 3309 
 3310 // Constant for byte-wide masking
 3311 operand immI_255()
 3312 %{
 3313   predicate(n->get_int() == 255);
 3314   match(ConI);
 3315 
 3316   format %{ %}
 3317   interface(CONST_INTER);
 3318 %}
 3319 
 3320 // Constant for short-wide masking
 3321 operand immI_65535()
 3322 %{
 3323   predicate(n->get_int() == 65535);
 3324   match(ConI);
 3325 
 3326   format %{ %}
 3327   interface(CONST_INTER);
 3328 %}
 3329 
 3330 // Constant for byte-wide masking
 3331 operand immL_255()
 3332 %{
 3333   predicate(n->get_long() == 255);
 3334   match(ConL);
 3335 
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 // Constant for short-wide masking
 3341 operand immL_65535()
 3342 %{
 3343   predicate(n->get_long() == 65535);
 3344   match(ConL);
 3345 
 3346   format %{ %}
 3347   interface(CONST_INTER);
 3348 %}
 3349 
 3350 operand kReg()
 3351 %{
 3352   constraint(ALLOC_IN_RC(vectmask_reg));
 3353   match(RegVectMask);
 3354   format %{%}
 3355   interface(REG_INTER);
 3356 %}
 3357 
 3358 operand kReg_K1()
 3359 %{
 3360   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3361   match(RegVectMask);
 3362   format %{%}
 3363   interface(REG_INTER);
 3364 %}
 3365 
 3366 operand kReg_K2()
 3367 %{
 3368   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3369   match(RegVectMask);
 3370   format %{%}
 3371   interface(REG_INTER);
 3372 %}
 3373 
 3374 // Special Registers
 3375 operand kReg_K3()
 3376 %{
 3377   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3378   match(RegVectMask);
 3379   format %{%}
 3380   interface(REG_INTER);
 3381 %}
 3382 
 3383 operand kReg_K4()
 3384 %{
 3385   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3386   match(RegVectMask);
 3387   format %{%}
 3388   interface(REG_INTER);
 3389 %}
 3390 
 3391 operand kReg_K5()
 3392 %{
 3393   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3394   match(RegVectMask);
 3395   format %{%}
 3396   interface(REG_INTER);
 3397 %}
 3398 
 3399 operand kReg_K6()
 3400 %{
 3401   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3402   match(RegVectMask);
 3403   format %{%}
 3404   interface(REG_INTER);
 3405 %}
 3406 
 3407 // Special Registers
 3408 operand kReg_K7()
 3409 %{
 3410   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3411   match(RegVectMask);
 3412   format %{%}
 3413   interface(REG_INTER);
 3414 %}
 3415 
 3416 // Register Operands
 3417 // Integer Register
 3418 operand rRegI()
 3419 %{
 3420   constraint(ALLOC_IN_RC(int_reg));
 3421   match(RegI);
 3422 
 3423   match(rax_RegI);
 3424   match(rbx_RegI);
 3425   match(rcx_RegI);
 3426   match(rdx_RegI);
 3427   match(rdi_RegI);
 3428 
 3429   format %{ %}
 3430   interface(REG_INTER);
 3431 %}
 3432 
 3433 // Special Registers
 3434 operand rax_RegI()
 3435 %{
 3436   constraint(ALLOC_IN_RC(int_rax_reg));
 3437   match(RegI);
 3438   match(rRegI);
 3439 
 3440   format %{ "RAX" %}
 3441   interface(REG_INTER);
 3442 %}
 3443 
 3444 // Special Registers
 3445 operand rbx_RegI()
 3446 %{
 3447   constraint(ALLOC_IN_RC(int_rbx_reg));
 3448   match(RegI);
 3449   match(rRegI);
 3450 
 3451   format %{ "RBX" %}
 3452   interface(REG_INTER);
 3453 %}
 3454 
 3455 operand rcx_RegI()
 3456 %{
 3457   constraint(ALLOC_IN_RC(int_rcx_reg));
 3458   match(RegI);
 3459   match(rRegI);
 3460 
 3461   format %{ "RCX" %}
 3462   interface(REG_INTER);
 3463 %}
 3464 
 3465 operand rdx_RegI()
 3466 %{
 3467   constraint(ALLOC_IN_RC(int_rdx_reg));
 3468   match(RegI);
 3469   match(rRegI);
 3470 
 3471   format %{ "RDX" %}
 3472   interface(REG_INTER);
 3473 %}
 3474 
 3475 operand rdi_RegI()
 3476 %{
 3477   constraint(ALLOC_IN_RC(int_rdi_reg));
 3478   match(RegI);
 3479   match(rRegI);
 3480 
 3481   format %{ "RDI" %}
 3482   interface(REG_INTER);
 3483 %}
 3484 
 3485 operand no_rax_rdx_RegI()
 3486 %{
 3487   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 3488   match(RegI);
 3489   match(rbx_RegI);
 3490   match(rcx_RegI);
 3491   match(rdi_RegI);
 3492 
 3493   format %{ %}
 3494   interface(REG_INTER);
 3495 %}
 3496 
 3497 operand no_rbp_r13_RegI()
 3498 %{
 3499   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 3500   match(RegI);
 3501   match(rRegI);
 3502   match(rax_RegI);
 3503   match(rbx_RegI);
 3504   match(rcx_RegI);
 3505   match(rdx_RegI);
 3506   match(rdi_RegI);
 3507 
 3508   format %{ %}
 3509   interface(REG_INTER);
 3510 %}
 3511 
 3512 // Pointer Register
 3513 operand any_RegP()
 3514 %{
 3515   constraint(ALLOC_IN_RC(any_reg));
 3516   match(RegP);
 3517   match(rax_RegP);
 3518   match(rbx_RegP);
 3519   match(rdi_RegP);
 3520   match(rsi_RegP);
 3521   match(rbp_RegP);
 3522   match(r15_RegP);
 3523   match(rRegP);
 3524 
 3525   format %{ %}
 3526   interface(REG_INTER);
 3527 %}
 3528 
 3529 operand rRegP()
 3530 %{
 3531   constraint(ALLOC_IN_RC(ptr_reg));
 3532   match(RegP);
 3533   match(rax_RegP);
 3534   match(rbx_RegP);
 3535   match(rdi_RegP);
 3536   match(rsi_RegP);
 3537   match(rbp_RegP);  // See Q&A below about
 3538   match(r15_RegP);  // r15_RegP and rbp_RegP.
 3539 
 3540   format %{ %}
 3541   interface(REG_INTER);
 3542 %}
 3543 
 3544 operand rRegN() %{
 3545   constraint(ALLOC_IN_RC(int_reg));
 3546   match(RegN);
 3547 
 3548   format %{ %}
 3549   interface(REG_INTER);
 3550 %}
 3551 
 3552 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 3553 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 3554 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 3555 // The output of an instruction is controlled by the allocator, which respects
 3556 // register class masks, not match rules.  Unless an instruction mentions
 3557 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 3558 // by the allocator as an input.
 3559 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 3560 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 3561 // result, RBP is not included in the output of the instruction either.
 3562 
 3563 operand no_rax_RegP()
 3564 %{
 3565   constraint(ALLOC_IN_RC(ptr_no_rax_reg));
 3566   match(RegP);
 3567   match(rbx_RegP);
 3568   match(rsi_RegP);
 3569   match(rdi_RegP);
 3570 
 3571   format %{ %}
 3572   interface(REG_INTER);
 3573 %}
 3574 
 3575 // This operand is not allowed to use RBP even if
 3576 // RBP is not used to hold the frame pointer.
 3577 operand no_rbp_RegP()
 3578 %{
 3579   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 3580   match(RegP);
 3581   match(rbx_RegP);
 3582   match(rsi_RegP);
 3583   match(rdi_RegP);
 3584 
 3585   format %{ %}
 3586   interface(REG_INTER);
 3587 %}
 3588 
 3589 operand no_rax_rbx_RegP()
 3590 %{
 3591   constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
 3592   match(RegP);
 3593   match(rsi_RegP);
 3594   match(rdi_RegP);
 3595 
 3596   format %{ %}
 3597   interface(REG_INTER);
 3598 %}
 3599 
 3600 // Special Registers
 3601 // Return a pointer value
 3602 operand rax_RegP()
 3603 %{
 3604   constraint(ALLOC_IN_RC(ptr_rax_reg));
 3605   match(RegP);
 3606   match(rRegP);
 3607 
 3608   format %{ %}
 3609   interface(REG_INTER);
 3610 %}
 3611 
 3612 // Special Registers
 3613 // Return a compressed pointer value
 3614 operand rax_RegN()
 3615 %{
 3616   constraint(ALLOC_IN_RC(int_rax_reg));
 3617   match(RegN);
 3618   match(rRegN);
 3619 
 3620   format %{ %}
 3621   interface(REG_INTER);
 3622 %}
 3623 
 3624 // Used in AtomicAdd
 3625 operand rbx_RegP()
 3626 %{
 3627   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 3628   match(RegP);
 3629   match(rRegP);
 3630 
 3631   format %{ %}
 3632   interface(REG_INTER);
 3633 %}
 3634 
 3635 operand rsi_RegP()
 3636 %{
 3637   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 3638   match(RegP);
 3639   match(rRegP);
 3640 
 3641   format %{ %}
 3642   interface(REG_INTER);
 3643 %}
 3644 
 3645 operand rbp_RegP()
 3646 %{
 3647   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 3648   match(RegP);
 3649   match(rRegP);
 3650 
 3651   format %{ %}
 3652   interface(REG_INTER);
 3653 %}
 3654 
 3655 // Used in rep stosq
 3656 operand rdi_RegP()
 3657 %{
 3658   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 3659   match(RegP);
 3660   match(rRegP);
 3661 
 3662   format %{ %}
 3663   interface(REG_INTER);
 3664 %}
 3665 
 3666 operand r15_RegP()
 3667 %{
 3668   constraint(ALLOC_IN_RC(ptr_r15_reg));
 3669   match(RegP);
 3670   match(rRegP);
 3671 
 3672   format %{ %}
 3673   interface(REG_INTER);
 3674 %}
 3675 
 3676 operand rRegL()
 3677 %{
 3678   constraint(ALLOC_IN_RC(long_reg));
 3679   match(RegL);
 3680   match(rax_RegL);
 3681   match(rdx_RegL);
 3682 
 3683   format %{ %}
 3684   interface(REG_INTER);
 3685 %}
 3686 
 3687 // Special Registers
 3688 operand no_rax_rdx_RegL()
 3689 %{
 3690   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 3691   match(RegL);
 3692   match(rRegL);
 3693 
 3694   format %{ %}
 3695   interface(REG_INTER);
 3696 %}
 3697 
 3698 operand rax_RegL()
 3699 %{
 3700   constraint(ALLOC_IN_RC(long_rax_reg));
 3701   match(RegL);
 3702   match(rRegL);
 3703 
 3704   format %{ "RAX" %}
 3705   interface(REG_INTER);
 3706 %}
 3707 
 3708 operand rcx_RegL()
 3709 %{
 3710   constraint(ALLOC_IN_RC(long_rcx_reg));
 3711   match(RegL);
 3712   match(rRegL);
 3713 
 3714   format %{ %}
 3715   interface(REG_INTER);
 3716 %}
 3717 
 3718 operand rdx_RegL()
 3719 %{
 3720   constraint(ALLOC_IN_RC(long_rdx_reg));
 3721   match(RegL);
 3722   match(rRegL);
 3723 
 3724   format %{ %}
 3725   interface(REG_INTER);
 3726 %}
 3727 
 3728 operand no_rbp_r13_RegL()
 3729 %{
 3730   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 3731   match(RegL);
 3732   match(rRegL);
 3733   match(rax_RegL);
 3734   match(rcx_RegL);
 3735   match(rdx_RegL);
 3736 
 3737   format %{ %}
 3738   interface(REG_INTER);
 3739 %}
 3740 
 3741 // Flags register, used as output of compare instructions
 3742 operand rFlagsReg()
 3743 %{
 3744   constraint(ALLOC_IN_RC(int_flags));
 3745   match(RegFlags);
 3746 
 3747   format %{ "RFLAGS" %}
 3748   interface(REG_INTER);
 3749 %}
 3750 
 3751 // Flags register, used as output of FLOATING POINT compare instructions
 3752 operand rFlagsRegU()
 3753 %{
 3754   constraint(ALLOC_IN_RC(int_flags));
 3755   match(RegFlags);
 3756 
 3757   format %{ "RFLAGS_U" %}
 3758   interface(REG_INTER);
 3759 %}
 3760 
 3761 operand rFlagsRegUCF() %{
 3762   constraint(ALLOC_IN_RC(int_flags));
 3763   match(RegFlags);
 3764   predicate(false);
 3765 
 3766   format %{ "RFLAGS_U_CF" %}
 3767   interface(REG_INTER);
 3768 %}
 3769 
 3770 // Float register operands
 3771 operand regF() %{
 3772    constraint(ALLOC_IN_RC(float_reg));
 3773    match(RegF);
 3774 
 3775    format %{ %}
 3776    interface(REG_INTER);
 3777 %}
 3778 
 3779 // Float register operands
 3780 operand legRegF() %{
 3781    constraint(ALLOC_IN_RC(float_reg_legacy));
 3782    match(RegF);
 3783 
 3784    format %{ %}
 3785    interface(REG_INTER);
 3786 %}
 3787 
 3788 // Float register operands
 3789 operand vlRegF() %{
 3790    constraint(ALLOC_IN_RC(float_reg_vl));
 3791    match(RegF);
 3792 
 3793    format %{ %}
 3794    interface(REG_INTER);
 3795 %}
 3796 
 3797 // Double register operands
 3798 operand regD() %{
 3799    constraint(ALLOC_IN_RC(double_reg));
 3800    match(RegD);
 3801 
 3802    format %{ %}
 3803    interface(REG_INTER);
 3804 %}
 3805 
 3806 // Double register operands
 3807 operand legRegD() %{
 3808    constraint(ALLOC_IN_RC(double_reg_legacy));
 3809    match(RegD);
 3810 
 3811    format %{ %}
 3812    interface(REG_INTER);
 3813 %}
 3814 
 3815 // Double register operands
 3816 operand vlRegD() %{
 3817    constraint(ALLOC_IN_RC(double_reg_vl));
 3818    match(RegD);
 3819 
 3820    format %{ %}
 3821    interface(REG_INTER);
 3822 %}
 3823 
 3824 //----------Memory Operands----------------------------------------------------
 3825 // Direct Memory Operand
 3826 // operand direct(immP addr)
 3827 // %{
 3828 //   match(addr);
 3829 
 3830 //   format %{ "[$addr]" %}
 3831 //   interface(MEMORY_INTER) %{
 3832 //     base(0xFFFFFFFF);
 3833 //     index(0x4);
 3834 //     scale(0x0);
 3835 //     disp($addr);
 3836 //   %}
 3837 // %}
 3838 
 3839 // Indirect Memory Operand
 3840 operand indirect(any_RegP reg)
 3841 %{
 3842   constraint(ALLOC_IN_RC(ptr_reg));
 3843   match(reg);
 3844 
 3845   format %{ "[$reg]" %}
 3846   interface(MEMORY_INTER) %{
 3847     base($reg);
 3848     index(0x4);
 3849     scale(0x0);
 3850     disp(0x0);
 3851   %}
 3852 %}
 3853 
 3854 // Indirect Memory Plus Short Offset Operand
 3855 operand indOffset8(any_RegP reg, immL8 off)
 3856 %{
 3857   constraint(ALLOC_IN_RC(ptr_reg));
 3858   match(AddP reg off);
 3859 
 3860   format %{ "[$reg + $off (8-bit)]" %}
 3861   interface(MEMORY_INTER) %{
 3862     base($reg);
 3863     index(0x4);
 3864     scale(0x0);
 3865     disp($off);
 3866   %}
 3867 %}
 3868 
 3869 // Indirect Memory Plus Long Offset Operand
 3870 operand indOffset32(any_RegP reg, immL32 off)
 3871 %{
 3872   constraint(ALLOC_IN_RC(ptr_reg));
 3873   match(AddP reg off);
 3874 
 3875   format %{ "[$reg + $off (32-bit)]" %}
 3876   interface(MEMORY_INTER) %{
 3877     base($reg);
 3878     index(0x4);
 3879     scale(0x0);
 3880     disp($off);
 3881   %}
 3882 %}
 3883 
 3884 // Indirect Memory Plus Index Register Plus Offset Operand
 3885 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 3886 %{
 3887   constraint(ALLOC_IN_RC(ptr_reg));
 3888   match(AddP (AddP reg lreg) off);
 3889 
 3890   op_cost(10);
 3891   format %{"[$reg + $off + $lreg]" %}
 3892   interface(MEMORY_INTER) %{
 3893     base($reg);
 3894     index($lreg);
 3895     scale(0x0);
 3896     disp($off);
 3897   %}
 3898 %}
 3899 
 3900 // Indirect Memory Plus Index Register Plus Offset Operand
 3901 operand indIndex(any_RegP reg, rRegL lreg)
 3902 %{
 3903   constraint(ALLOC_IN_RC(ptr_reg));
 3904   match(AddP reg lreg);
 3905 
 3906   op_cost(10);
 3907   format %{"[$reg + $lreg]" %}
 3908   interface(MEMORY_INTER) %{
 3909     base($reg);
 3910     index($lreg);
 3911     scale(0x0);
 3912     disp(0x0);
 3913   %}
 3914 %}
 3915 
 3916 // Indirect Memory Times Scale Plus Index Register
 3917 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 3918 %{
 3919   constraint(ALLOC_IN_RC(ptr_reg));
 3920   match(AddP reg (LShiftL lreg scale));
 3921 
 3922   op_cost(10);
 3923   format %{"[$reg + $lreg << $scale]" %}
 3924   interface(MEMORY_INTER) %{
 3925     base($reg);
 3926     index($lreg);
 3927     scale($scale);
 3928     disp(0x0);
 3929   %}
 3930 %}
 3931 
 3932 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 3933 %{
 3934   constraint(ALLOC_IN_RC(ptr_reg));
 3935   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3936   match(AddP reg (LShiftL (ConvI2L idx) scale));
 3937 
 3938   op_cost(10);
 3939   format %{"[$reg + pos $idx << $scale]" %}
 3940   interface(MEMORY_INTER) %{
 3941     base($reg);
 3942     index($idx);
 3943     scale($scale);
 3944     disp(0x0);
 3945   %}
 3946 %}
 3947 
 3948 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 3949 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 3950 %{
 3951   constraint(ALLOC_IN_RC(ptr_reg));
 3952   match(AddP (AddP reg (LShiftL lreg scale)) off);
 3953 
 3954   op_cost(10);
 3955   format %{"[$reg + $off + $lreg << $scale]" %}
 3956   interface(MEMORY_INTER) %{
 3957     base($reg);
 3958     index($lreg);
 3959     scale($scale);
 3960     disp($off);
 3961   %}
 3962 %}
 3963 
 3964 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 3965 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 3966 %{
 3967   constraint(ALLOC_IN_RC(ptr_reg));
 3968   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 3969   match(AddP (AddP reg (ConvI2L idx)) off);
 3970 
 3971   op_cost(10);
 3972   format %{"[$reg + $off + $idx]" %}
 3973   interface(MEMORY_INTER) %{
 3974     base($reg);
 3975     index($idx);
 3976     scale(0x0);
 3977     disp($off);
 3978   %}
 3979 %}
 3980 
 3981 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3982 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3983 %{
 3984   constraint(ALLOC_IN_RC(ptr_reg));
 3985   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3986   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3987 
 3988   op_cost(10);
 3989   format %{"[$reg + $off + $idx << $scale]" %}
 3990   interface(MEMORY_INTER) %{
 3991     base($reg);
 3992     index($idx);
 3993     scale($scale);
 3994     disp($off);
 3995   %}
 3996 %}
 3997 
 3998 // Indirect Narrow Oop Operand
 3999 operand indCompressedOop(rRegN reg) %{
 4000   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4001   constraint(ALLOC_IN_RC(ptr_reg));
 4002   match(DecodeN reg);
 4003 
 4004   op_cost(10);
 4005   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 4006   interface(MEMORY_INTER) %{
 4007     base(0xc); // R12
 4008     index($reg);
 4009     scale(0x3);
 4010     disp(0x0);
 4011   %}
 4012 %}
 4013 
 4014 // Indirect Narrow Oop Plus Offset Operand
 4015 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 4016 // we can't free r12 even with CompressedOops::base() == NULL.
 4017 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 4018   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 4019   constraint(ALLOC_IN_RC(ptr_reg));
 4020   match(AddP (DecodeN reg) off);
 4021 
 4022   op_cost(10);
 4023   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 4024   interface(MEMORY_INTER) %{
 4025     base(0xc); // R12
 4026     index($reg);
 4027     scale(0x3);
 4028     disp($off);
 4029   %}
 4030 %}
 4031 
 4032 // Indirect Memory Operand
 4033 operand indirectNarrow(rRegN reg)
 4034 %{
 4035   predicate(CompressedOops::shift() == 0);
 4036   constraint(ALLOC_IN_RC(ptr_reg));
 4037   match(DecodeN reg);
 4038 
 4039   format %{ "[$reg]" %}
 4040   interface(MEMORY_INTER) %{
 4041     base($reg);
 4042     index(0x4);
 4043     scale(0x0);
 4044     disp(0x0);
 4045   %}
 4046 %}
 4047 
 4048 // Indirect Memory Plus Short Offset Operand
 4049 operand indOffset8Narrow(rRegN reg, immL8 off)
 4050 %{
 4051   predicate(CompressedOops::shift() == 0);
 4052   constraint(ALLOC_IN_RC(ptr_reg));
 4053   match(AddP (DecodeN reg) off);
 4054 
 4055   format %{ "[$reg + $off (8-bit)]" %}
 4056   interface(MEMORY_INTER) %{
 4057     base($reg);
 4058     index(0x4);
 4059     scale(0x0);
 4060     disp($off);
 4061   %}
 4062 %}
 4063 
 4064 // Indirect Memory Plus Long Offset Operand
 4065 operand indOffset32Narrow(rRegN reg, immL32 off)
 4066 %{
 4067   predicate(CompressedOops::shift() == 0);
 4068   constraint(ALLOC_IN_RC(ptr_reg));
 4069   match(AddP (DecodeN reg) off);
 4070 
 4071   format %{ "[$reg + $off (32-bit)]" %}
 4072   interface(MEMORY_INTER) %{
 4073     base($reg);
 4074     index(0x4);
 4075     scale(0x0);
 4076     disp($off);
 4077   %}
 4078 %}
 4079 
 4080 // Indirect Memory Plus Index Register Plus Offset Operand
 4081 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 4082 %{
 4083   predicate(CompressedOops::shift() == 0);
 4084   constraint(ALLOC_IN_RC(ptr_reg));
 4085   match(AddP (AddP (DecodeN reg) lreg) off);
 4086 
 4087   op_cost(10);
 4088   format %{"[$reg + $off + $lreg]" %}
 4089   interface(MEMORY_INTER) %{
 4090     base($reg);
 4091     index($lreg);
 4092     scale(0x0);
 4093     disp($off);
 4094   %}
 4095 %}
 4096 
 4097 // Indirect Memory Plus Index Register Plus Offset Operand
 4098 operand indIndexNarrow(rRegN reg, rRegL lreg)
 4099 %{
 4100   predicate(CompressedOops::shift() == 0);
 4101   constraint(ALLOC_IN_RC(ptr_reg));
 4102   match(AddP (DecodeN reg) lreg);
 4103 
 4104   op_cost(10);
 4105   format %{"[$reg + $lreg]" %}
 4106   interface(MEMORY_INTER) %{
 4107     base($reg);
 4108     index($lreg);
 4109     scale(0x0);
 4110     disp(0x0);
 4111   %}
 4112 %}
 4113 
 4114 // Indirect Memory Times Scale Plus Index Register
 4115 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 4116 %{
 4117   predicate(CompressedOops::shift() == 0);
 4118   constraint(ALLOC_IN_RC(ptr_reg));
 4119   match(AddP (DecodeN reg) (LShiftL lreg scale));
 4120 
 4121   op_cost(10);
 4122   format %{"[$reg + $lreg << $scale]" %}
 4123   interface(MEMORY_INTER) %{
 4124     base($reg);
 4125     index($lreg);
 4126     scale($scale);
 4127     disp(0x0);
 4128   %}
 4129 %}
 4130 
 4131 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4132 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 4133 %{
 4134   predicate(CompressedOops::shift() == 0);
 4135   constraint(ALLOC_IN_RC(ptr_reg));
 4136   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 4137 
 4138   op_cost(10);
 4139   format %{"[$reg + $off + $lreg << $scale]" %}
 4140   interface(MEMORY_INTER) %{
 4141     base($reg);
 4142     index($lreg);
 4143     scale($scale);
 4144     disp($off);
 4145   %}
 4146 %}
 4147 
 4148 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 4149 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 4150 %{
 4151   constraint(ALLOC_IN_RC(ptr_reg));
 4152   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 4153   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 4154 
 4155   op_cost(10);
 4156   format %{"[$reg + $off + $idx]" %}
 4157   interface(MEMORY_INTER) %{
 4158     base($reg);
 4159     index($idx);
 4160     scale(0x0);
 4161     disp($off);
 4162   %}
 4163 %}
 4164 
 4165 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 4166 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 4167 %{
 4168   constraint(ALLOC_IN_RC(ptr_reg));
 4169   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 4170   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 4171 
 4172   op_cost(10);
 4173   format %{"[$reg + $off + $idx << $scale]" %}
 4174   interface(MEMORY_INTER) %{
 4175     base($reg);
 4176     index($idx);
 4177     scale($scale);
 4178     disp($off);
 4179   %}
 4180 %}
 4181 
 4182 //----------Special Memory Operands--------------------------------------------
 4183 // Stack Slot Operand - This operand is used for loading and storing temporary
 4184 //                      values on the stack where a match requires a value to
 4185 //                      flow through memory.
 4186 operand stackSlotP(sRegP reg)
 4187 %{
 4188   constraint(ALLOC_IN_RC(stack_slots));
 4189   // No match rule because this operand is only generated in matching
 4190 
 4191   format %{ "[$reg]" %}
 4192   interface(MEMORY_INTER) %{
 4193     base(0x4);   // RSP
 4194     index(0x4);  // No Index
 4195     scale(0x0);  // No Scale
 4196     disp($reg);  // Stack Offset
 4197   %}
 4198 %}
 4199 
 4200 operand stackSlotI(sRegI reg)
 4201 %{
 4202   constraint(ALLOC_IN_RC(stack_slots));
 4203   // No match rule because this operand is only generated in matching
 4204 
 4205   format %{ "[$reg]" %}
 4206   interface(MEMORY_INTER) %{
 4207     base(0x4);   // RSP
 4208     index(0x4);  // No Index
 4209     scale(0x0);  // No Scale
 4210     disp($reg);  // Stack Offset
 4211   %}
 4212 %}
 4213 
 4214 operand stackSlotF(sRegF reg)
 4215 %{
 4216   constraint(ALLOC_IN_RC(stack_slots));
 4217   // No match rule because this operand is only generated in matching
 4218 
 4219   format %{ "[$reg]" %}
 4220   interface(MEMORY_INTER) %{
 4221     base(0x4);   // RSP
 4222     index(0x4);  // No Index
 4223     scale(0x0);  // No Scale
 4224     disp($reg);  // Stack Offset
 4225   %}
 4226 %}
 4227 
 4228 operand stackSlotD(sRegD reg)
 4229 %{
 4230   constraint(ALLOC_IN_RC(stack_slots));
 4231   // No match rule because this operand is only generated in matching
 4232 
 4233   format %{ "[$reg]" %}
 4234   interface(MEMORY_INTER) %{
 4235     base(0x4);   // RSP
 4236     index(0x4);  // No Index
 4237     scale(0x0);  // No Scale
 4238     disp($reg);  // Stack Offset
 4239   %}
 4240 %}
 4241 operand stackSlotL(sRegL reg)
 4242 %{
 4243   constraint(ALLOC_IN_RC(stack_slots));
 4244   // No match rule because this operand is only generated in matching
 4245 
 4246   format %{ "[$reg]" %}
 4247   interface(MEMORY_INTER) %{
 4248     base(0x4);   // RSP
 4249     index(0x4);  // No Index
 4250     scale(0x0);  // No Scale
 4251     disp($reg);  // Stack Offset
 4252   %}
 4253 %}
 4254 
 4255 //----------Conditional Branch Operands----------------------------------------
 4256 // Comparison Op  - This is the operation of the comparison, and is limited to
 4257 //                  the following set of codes:
 4258 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4259 //
 4260 // Other attributes of the comparison, such as unsignedness, are specified
 4261 // by the comparison instruction that sets a condition code flags register.
 4262 // That result is represented by a flags operand whose subtype is appropriate
 4263 // to the unsignedness (etc.) of the comparison.
 4264 //
 4265 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4266 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4267 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4268 
 4269 // Comparison Code
 4270 operand cmpOp()
 4271 %{
 4272   match(Bool);
 4273 
 4274   format %{ "" %}
 4275   interface(COND_INTER) %{
 4276     equal(0x4, "e");
 4277     not_equal(0x5, "ne");
 4278     less(0xC, "l");
 4279     greater_equal(0xD, "ge");
 4280     less_equal(0xE, "le");
 4281     greater(0xF, "g");
 4282     overflow(0x0, "o");
 4283     no_overflow(0x1, "no");
 4284   %}
 4285 %}
 4286 
 4287 // Comparison Code, unsigned compare.  Used by FP also, with
 4288 // C2 (unordered) turned into GT or LT already.  The other bits
 4289 // C0 and C3 are turned into Carry & Zero flags.
 4290 operand cmpOpU()
 4291 %{
 4292   match(Bool);
 4293 
 4294   format %{ "" %}
 4295   interface(COND_INTER) %{
 4296     equal(0x4, "e");
 4297     not_equal(0x5, "ne");
 4298     less(0x2, "b");
 4299     greater_equal(0x3, "ae");
 4300     less_equal(0x6, "be");
 4301     greater(0x7, "a");
 4302     overflow(0x0, "o");
 4303     no_overflow(0x1, "no");
 4304   %}
 4305 %}
 4306 
 4307 
 4308 // Floating comparisons that don't require any fixup for the unordered case,
 4309 // If both inputs of the comparison are the same, ZF is always set so we
 4310 // don't need to use cmpOpUCF2 for eq/ne
 4311 operand cmpOpUCF() %{
 4312   match(Bool);
 4313   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4314             n->as_Bool()->_test._test == BoolTest::ge ||
 4315             n->as_Bool()->_test._test == BoolTest::le ||
 4316             n->as_Bool()->_test._test == BoolTest::gt ||
 4317             n->in(1)->in(1) == n->in(1)->in(2));
 4318   format %{ "" %}
 4319   interface(COND_INTER) %{
 4320     equal(0xb, "np");
 4321     not_equal(0xa, "p");
 4322     less(0x2, "b");
 4323     greater_equal(0x3, "ae");
 4324     less_equal(0x6, "be");
 4325     greater(0x7, "a");
 4326     overflow(0x0, "o");
 4327     no_overflow(0x1, "no");
 4328   %}
 4329 %}
 4330 
 4331 
 4332 // Floating comparisons that can be fixed up with extra conditional jumps
 4333 operand cmpOpUCF2() %{
 4334   match(Bool);
 4335   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 4336              n->as_Bool()->_test._test == BoolTest::eq) &&
 4337             n->in(1)->in(1) != n->in(1)->in(2));
 4338   format %{ "" %}
 4339   interface(COND_INTER) %{
 4340     equal(0x4, "e");
 4341     not_equal(0x5, "ne");
 4342     less(0x2, "b");
 4343     greater_equal(0x3, "ae");
 4344     less_equal(0x6, "be");
 4345     greater(0x7, "a");
 4346     overflow(0x0, "o");
 4347     no_overflow(0x1, "no");
 4348   %}
 4349 %}
 4350 
 4351 //----------OPERAND CLASSES----------------------------------------------------
 4352 // Operand Classes are groups of operands that are used as to simplify
 4353 // instruction definitions by not requiring the AD writer to specify separate
 4354 // instructions for every form of operand when the instruction accepts
 4355 // multiple operand types with the same basic encoding and format.  The classic
 4356 // case of this is memory operands.
 4357 
 4358 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4359                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4360                indCompressedOop, indCompressedOopOffset,
 4361                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4362                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4363                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4364 
 4365 //----------PIPELINE-----------------------------------------------------------
 4366 // Rules which define the behavior of the target architectures pipeline.
 4367 pipeline %{
 4368 
 4369 //----------ATTRIBUTES---------------------------------------------------------
 4370 attributes %{
 4371   variable_size_instructions;        // Fixed size instructions
 4372   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4373   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4374   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4375   instruction_fetch_units = 1;       // of 16 bytes
 4376 
 4377   // List of nop instructions
 4378   nops( MachNop );
 4379 %}
 4380 
 4381 //----------RESOURCES----------------------------------------------------------
 4382 // Resources are the functional units available to the machine
 4383 
 4384 // Generic P2/P3 pipeline
 4385 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4386 // 3 instructions decoded per cycle.
 4387 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4388 // 3 ALU op, only ALU0 handles mul instructions.
 4389 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4390            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 4391            BR, FPU,
 4392            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 4393 
 4394 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4395 // Pipeline Description specifies the stages in the machine's pipeline
 4396 
 4397 // Generic P2/P3 pipeline
 4398 pipe_desc(S0, S1, S2, S3, S4, S5);
 4399 
 4400 //----------PIPELINE CLASSES---------------------------------------------------
 4401 // Pipeline Classes describe the stages in which input and output are
 4402 // referenced by the hardware pipeline.
 4403 
 4404 // Naming convention: ialu or fpu
 4405 // Then: _reg
 4406 // Then: _reg if there is a 2nd register
 4407 // Then: _long if it's a pair of instructions implementing a long
 4408 // Then: _fat if it requires the big decoder
 4409 //   Or: _mem if it requires the big decoder and a memory unit.
 4410 
 4411 // Integer ALU reg operation
 4412 pipe_class ialu_reg(rRegI dst)
 4413 %{
 4414     single_instruction;
 4415     dst    : S4(write);
 4416     dst    : S3(read);
 4417     DECODE : S0;        // any decoder
 4418     ALU    : S3;        // any alu
 4419 %}
 4420 
 4421 // Long ALU reg operation
 4422 pipe_class ialu_reg_long(rRegL dst)
 4423 %{
 4424     instruction_count(2);
 4425     dst    : S4(write);
 4426     dst    : S3(read);
 4427     DECODE : S0(2);     // any 2 decoders
 4428     ALU    : S3(2);     // both alus
 4429 %}
 4430 
 4431 // Integer ALU reg operation using big decoder
 4432 pipe_class ialu_reg_fat(rRegI dst)
 4433 %{
 4434     single_instruction;
 4435     dst    : S4(write);
 4436     dst    : S3(read);
 4437     D0     : S0;        // big decoder only
 4438     ALU    : S3;        // any alu
 4439 %}
 4440 
 4441 // Integer ALU reg-reg operation
 4442 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 4443 %{
 4444     single_instruction;
 4445     dst    : S4(write);
 4446     src    : S3(read);
 4447     DECODE : S0;        // any decoder
 4448     ALU    : S3;        // any alu
 4449 %}
 4450 
 4451 // Integer ALU reg-reg operation
 4452 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 4453 %{
 4454     single_instruction;
 4455     dst    : S4(write);
 4456     src    : S3(read);
 4457     D0     : S0;        // big decoder only
 4458     ALU    : S3;        // any alu
 4459 %}
 4460 
 4461 // Integer ALU reg-mem operation
 4462 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 4463 %{
 4464     single_instruction;
 4465     dst    : S5(write);
 4466     mem    : S3(read);
 4467     D0     : S0;        // big decoder only
 4468     ALU    : S4;        // any alu
 4469     MEM    : S3;        // any mem
 4470 %}
 4471 
 4472 // Integer mem operation (prefetch)
 4473 pipe_class ialu_mem(memory mem)
 4474 %{
 4475     single_instruction;
 4476     mem    : S3(read);
 4477     D0     : S0;        // big decoder only
 4478     MEM    : S3;        // any mem
 4479 %}
 4480 
 4481 // Integer Store to Memory
 4482 pipe_class ialu_mem_reg(memory mem, rRegI src)
 4483 %{
 4484     single_instruction;
 4485     mem    : S3(read);
 4486     src    : S5(read);
 4487     D0     : S0;        // big decoder only
 4488     ALU    : S4;        // any alu
 4489     MEM    : S3;
 4490 %}
 4491 
 4492 // // Long Store to Memory
 4493 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 4494 // %{
 4495 //     instruction_count(2);
 4496 //     mem    : S3(read);
 4497 //     src    : S5(read);
 4498 //     D0     : S0(2);          // big decoder only; twice
 4499 //     ALU    : S4(2);     // any 2 alus
 4500 //     MEM    : S3(2);  // Both mems
 4501 // %}
 4502 
 4503 // Integer Store to Memory
 4504 pipe_class ialu_mem_imm(memory mem)
 4505 %{
 4506     single_instruction;
 4507     mem    : S3(read);
 4508     D0     : S0;        // big decoder only
 4509     ALU    : S4;        // any alu
 4510     MEM    : S3;
 4511 %}
 4512 
 4513 // Integer ALU0 reg-reg operation
 4514 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 4515 %{
 4516     single_instruction;
 4517     dst    : S4(write);
 4518     src    : S3(read);
 4519     D0     : S0;        // Big decoder only
 4520     ALU0   : S3;        // only alu0
 4521 %}
 4522 
 4523 // Integer ALU0 reg-mem operation
 4524 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 4525 %{
 4526     single_instruction;
 4527     dst    : S5(write);
 4528     mem    : S3(read);
 4529     D0     : S0;        // big decoder only
 4530     ALU0   : S4;        // ALU0 only
 4531     MEM    : S3;        // any mem
 4532 %}
 4533 
 4534 // Integer ALU reg-reg operation
 4535 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 4536 %{
 4537     single_instruction;
 4538     cr     : S4(write);
 4539     src1   : S3(read);
 4540     src2   : S3(read);
 4541     DECODE : S0;        // any decoder
 4542     ALU    : S3;        // any alu
 4543 %}
 4544 
 4545 // Integer ALU reg-imm operation
 4546 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 4547 %{
 4548     single_instruction;
 4549     cr     : S4(write);
 4550     src1   : S3(read);
 4551     DECODE : S0;        // any decoder
 4552     ALU    : S3;        // any alu
 4553 %}
 4554 
 4555 // Integer ALU reg-mem operation
 4556 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 4557 %{
 4558     single_instruction;
 4559     cr     : S4(write);
 4560     src1   : S3(read);
 4561     src2   : S3(read);
 4562     D0     : S0;        // big decoder only
 4563     ALU    : S4;        // any alu
 4564     MEM    : S3;
 4565 %}
 4566 
 4567 // Conditional move reg-reg
 4568 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 4569 %{
 4570     instruction_count(4);
 4571     y      : S4(read);
 4572     q      : S3(read);
 4573     p      : S3(read);
 4574     DECODE : S0(4);     // any decoder
 4575 %}
 4576 
 4577 // Conditional move reg-reg
 4578 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 4579 %{
 4580     single_instruction;
 4581     dst    : S4(write);
 4582     src    : S3(read);
 4583     cr     : S3(read);
 4584     DECODE : S0;        // any decoder
 4585 %}
 4586 
 4587 // Conditional move reg-mem
 4588 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 4589 %{
 4590     single_instruction;
 4591     dst    : S4(write);
 4592     src    : S3(read);
 4593     cr     : S3(read);
 4594     DECODE : S0;        // any decoder
 4595     MEM    : S3;
 4596 %}
 4597 
 4598 // Conditional move reg-reg long
 4599 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 4600 %{
 4601     single_instruction;
 4602     dst    : S4(write);
 4603     src    : S3(read);
 4604     cr     : S3(read);
 4605     DECODE : S0(2);     // any 2 decoders
 4606 %}
 4607 
 4608 // XXX
 4609 // // Conditional move double reg-reg
 4610 // pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
 4611 // %{
 4612 //     single_instruction;
 4613 //     dst    : S4(write);
 4614 //     src    : S3(read);
 4615 //     cr     : S3(read);
 4616 //     DECODE : S0;     // any decoder
 4617 // %}
 4618 
 4619 // Float reg-reg operation
 4620 pipe_class fpu_reg(regD dst)
 4621 %{
 4622     instruction_count(2);
 4623     dst    : S3(read);
 4624     DECODE : S0(2);     // any 2 decoders
 4625     FPU    : S3;
 4626 %}
 4627 
 4628 // Float reg-reg operation
 4629 pipe_class fpu_reg_reg(regD dst, regD src)
 4630 %{
 4631     instruction_count(2);
 4632     dst    : S4(write);
 4633     src    : S3(read);
 4634     DECODE : S0(2);     // any 2 decoders
 4635     FPU    : S3;
 4636 %}
 4637 
 4638 // Float reg-reg operation
 4639 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 4640 %{
 4641     instruction_count(3);
 4642     dst    : S4(write);
 4643     src1   : S3(read);
 4644     src2   : S3(read);
 4645     DECODE : S0(3);     // any 3 decoders
 4646     FPU    : S3(2);
 4647 %}
 4648 
 4649 // Float reg-reg operation
 4650 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 4651 %{
 4652     instruction_count(4);
 4653     dst    : S4(write);
 4654     src1   : S3(read);
 4655     src2   : S3(read);
 4656     src3   : S3(read);
 4657     DECODE : S0(4);     // any 3 decoders
 4658     FPU    : S3(2);
 4659 %}
 4660 
 4661 // Float reg-reg operation
 4662 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 4663 %{
 4664     instruction_count(4);
 4665     dst    : S4(write);
 4666     src1   : S3(read);
 4667     src2   : S3(read);
 4668     src3   : S3(read);
 4669     DECODE : S1(3);     // any 3 decoders
 4670     D0     : S0;        // Big decoder only
 4671     FPU    : S3(2);
 4672     MEM    : S3;
 4673 %}
 4674 
 4675 // Float reg-mem operation
 4676 pipe_class fpu_reg_mem(regD dst, memory mem)
 4677 %{
 4678     instruction_count(2);
 4679     dst    : S5(write);
 4680     mem    : S3(read);
 4681     D0     : S0;        // big decoder only
 4682     DECODE : S1;        // any decoder for FPU POP
 4683     FPU    : S4;
 4684     MEM    : S3;        // any mem
 4685 %}
 4686 
 4687 // Float reg-mem operation
 4688 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 4689 %{
 4690     instruction_count(3);
 4691     dst    : S5(write);
 4692     src1   : S3(read);
 4693     mem    : S3(read);
 4694     D0     : S0;        // big decoder only
 4695     DECODE : S1(2);     // any decoder for FPU POP
 4696     FPU    : S4;
 4697     MEM    : S3;        // any mem
 4698 %}
 4699 
 4700 // Float mem-reg operation
 4701 pipe_class fpu_mem_reg(memory mem, regD src)
 4702 %{
 4703     instruction_count(2);
 4704     src    : S5(read);
 4705     mem    : S3(read);
 4706     DECODE : S0;        // any decoder for FPU PUSH
 4707     D0     : S1;        // big decoder only
 4708     FPU    : S4;
 4709     MEM    : S3;        // any mem
 4710 %}
 4711 
 4712 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 4713 %{
 4714     instruction_count(3);
 4715     src1   : S3(read);
 4716     src2   : S3(read);
 4717     mem    : S3(read);
 4718     DECODE : S0(2);     // any decoder for FPU PUSH
 4719     D0     : S1;        // big decoder only
 4720     FPU    : S4;
 4721     MEM    : S3;        // any mem
 4722 %}
 4723 
 4724 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 4725 %{
 4726     instruction_count(3);
 4727     src1   : S3(read);
 4728     src2   : S3(read);
 4729     mem    : S4(read);
 4730     DECODE : S0;        // any decoder for FPU PUSH
 4731     D0     : S0(2);     // big decoder only
 4732     FPU    : S4;
 4733     MEM    : S3(2);     // any mem
 4734 %}
 4735 
 4736 pipe_class fpu_mem_mem(memory dst, memory src1)
 4737 %{
 4738     instruction_count(2);
 4739     src1   : S3(read);
 4740     dst    : S4(read);
 4741     D0     : S0(2);     // big decoder only
 4742     MEM    : S3(2);     // any mem
 4743 %}
 4744 
 4745 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 4746 %{
 4747     instruction_count(3);
 4748     src1   : S3(read);
 4749     src2   : S3(read);
 4750     dst    : S4(read);
 4751     D0     : S0(3);     // big decoder only
 4752     FPU    : S4;
 4753     MEM    : S3(3);     // any mem
 4754 %}
 4755 
 4756 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 4757 %{
 4758     instruction_count(3);
 4759     src1   : S4(read);
 4760     mem    : S4(read);
 4761     DECODE : S0;        // any decoder for FPU PUSH
 4762     D0     : S0(2);     // big decoder only
 4763     FPU    : S4;
 4764     MEM    : S3(2);     // any mem
 4765 %}
 4766 
 4767 // Float load constant
 4768 pipe_class fpu_reg_con(regD dst)
 4769 %{
 4770     instruction_count(2);
 4771     dst    : S5(write);
 4772     D0     : S0;        // big decoder only for the load
 4773     DECODE : S1;        // any decoder for FPU POP
 4774     FPU    : S4;
 4775     MEM    : S3;        // any mem
 4776 %}
 4777 
 4778 // Float load constant
 4779 pipe_class fpu_reg_reg_con(regD dst, regD src)
 4780 %{
 4781     instruction_count(3);
 4782     dst    : S5(write);
 4783     src    : S3(read);
 4784     D0     : S0;        // big decoder only for the load
 4785     DECODE : S1(2);     // any decoder for FPU POP
 4786     FPU    : S4;
 4787     MEM    : S3;        // any mem
 4788 %}
 4789 
 4790 // UnConditional branch
 4791 pipe_class pipe_jmp(label labl)
 4792 %{
 4793     single_instruction;
 4794     BR   : S3;
 4795 %}
 4796 
 4797 // Conditional branch
 4798 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 4799 %{
 4800     single_instruction;
 4801     cr    : S1(read);
 4802     BR    : S3;
 4803 %}
 4804 
 4805 // Allocation idiom
 4806 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 4807 %{
 4808     instruction_count(1); force_serialization;
 4809     fixed_latency(6);
 4810     heap_ptr : S3(read);
 4811     DECODE   : S0(3);
 4812     D0       : S2;
 4813     MEM      : S3;
 4814     ALU      : S3(2);
 4815     dst      : S5(write);
 4816     BR       : S5;
 4817 %}
 4818 
 4819 // Generic big/slow expanded idiom
 4820 pipe_class pipe_slow()
 4821 %{
 4822     instruction_count(10); multiple_bundles; force_serialization;
 4823     fixed_latency(100);
 4824     D0  : S0(2);
 4825     MEM : S3(2);
 4826 %}
 4827 
 4828 // The real do-nothing guy
 4829 pipe_class empty()
 4830 %{
 4831     instruction_count(0);
 4832 %}
 4833 
 4834 // Define the class for the Nop node
 4835 define
 4836 %{
 4837    MachNop = empty;
 4838 %}
 4839 
 4840 %}
 4841 
 4842 //----------INSTRUCTIONS-------------------------------------------------------
 4843 //
 4844 // match      -- States which machine-independent subtree may be replaced
 4845 //               by this instruction.
 4846 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4847 //               selection to identify a minimum cost tree of machine
 4848 //               instructions that matches a tree of machine-independent
 4849 //               instructions.
 4850 // format     -- A string providing the disassembly for this instruction.
 4851 //               The value of an instruction's operand may be inserted
 4852 //               by referring to it with a '$' prefix.
 4853 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4854 //               to within an encode class as $primary, $secondary, and $tertiary
 4855 //               rrspectively.  The primary opcode is commonly used to
 4856 //               indicate the type of machine instruction, while secondary
 4857 //               and tertiary are often used for prefix options or addressing
 4858 //               modes.
 4859 // ins_encode -- A list of encode classes with parameters. The encode class
 4860 //               name must have been defined in an 'enc_class' specification
 4861 //               in the encode section of the architecture description.
 4862 
 4863 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4864 // Load Float
 4865 instruct MoveF2VL(vlRegF dst, regF src) %{
 4866   match(Set dst src);
 4867   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4868   ins_encode %{
 4869     ShouldNotReachHere();
 4870   %}
 4871   ins_pipe( fpu_reg_reg );
 4872 %}
 4873 
 4874 // Load Float
 4875 instruct MoveF2LEG(legRegF dst, regF src) %{
 4876   match(Set dst src);
 4877   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4878   ins_encode %{
 4879     ShouldNotReachHere();
 4880   %}
 4881   ins_pipe( fpu_reg_reg );
 4882 %}
 4883 
 4884 // Load Float
 4885 instruct MoveVL2F(regF dst, vlRegF src) %{
 4886   match(Set dst src);
 4887   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4888   ins_encode %{
 4889     ShouldNotReachHere();
 4890   %}
 4891   ins_pipe( fpu_reg_reg );
 4892 %}
 4893 
 4894 // Load Float
 4895 instruct MoveLEG2F(regF dst, legRegF src) %{
 4896   match(Set dst src);
 4897   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4898   ins_encode %{
 4899     ShouldNotReachHere();
 4900   %}
 4901   ins_pipe( fpu_reg_reg );
 4902 %}
 4903 
 4904 // Load Double
 4905 instruct MoveD2VL(vlRegD dst, regD src) %{
 4906   match(Set dst src);
 4907   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4908   ins_encode %{
 4909     ShouldNotReachHere();
 4910   %}
 4911   ins_pipe( fpu_reg_reg );
 4912 %}
 4913 
 4914 // Load Double
 4915 instruct MoveD2LEG(legRegD dst, regD src) %{
 4916   match(Set dst src);
 4917   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4918   ins_encode %{
 4919     ShouldNotReachHere();
 4920   %}
 4921   ins_pipe( fpu_reg_reg );
 4922 %}
 4923 
 4924 // Load Double
 4925 instruct MoveVL2D(regD dst, vlRegD src) %{
 4926   match(Set dst src);
 4927   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4928   ins_encode %{
 4929     ShouldNotReachHere();
 4930   %}
 4931   ins_pipe( fpu_reg_reg );
 4932 %}
 4933 
 4934 // Load Double
 4935 instruct MoveLEG2D(regD dst, legRegD src) %{
 4936   match(Set dst src);
 4937   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4938   ins_encode %{
 4939     ShouldNotReachHere();
 4940   %}
 4941   ins_pipe( fpu_reg_reg );
 4942 %}
 4943 
 4944 //----------Load/Store/Move Instructions---------------------------------------
 4945 //----------Load Instructions--------------------------------------------------
 4946 
 4947 // Load Byte (8 bit signed)
 4948 instruct loadB(rRegI dst, memory mem)
 4949 %{
 4950   match(Set dst (LoadB mem));
 4951 
 4952   ins_cost(125);
 4953   format %{ "movsbl  $dst, $mem\t# byte" %}
 4954 
 4955   ins_encode %{
 4956     __ movsbl($dst$$Register, $mem$$Address);
 4957   %}
 4958 
 4959   ins_pipe(ialu_reg_mem);
 4960 %}
 4961 
 4962 // Load Byte (8 bit signed) into Long Register
 4963 instruct loadB2L(rRegL dst, memory mem)
 4964 %{
 4965   match(Set dst (ConvI2L (LoadB mem)));
 4966 
 4967   ins_cost(125);
 4968   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 4969 
 4970   ins_encode %{
 4971     __ movsbq($dst$$Register, $mem$$Address);
 4972   %}
 4973 
 4974   ins_pipe(ialu_reg_mem);
 4975 %}
 4976 
 4977 // Load Unsigned Byte (8 bit UNsigned)
 4978 instruct loadUB(rRegI dst, memory mem)
 4979 %{
 4980   match(Set dst (LoadUB mem));
 4981 
 4982   ins_cost(125);
 4983   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 4984 
 4985   ins_encode %{
 4986     __ movzbl($dst$$Register, $mem$$Address);
 4987   %}
 4988 
 4989   ins_pipe(ialu_reg_mem);
 4990 %}
 4991 
 4992 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 4993 instruct loadUB2L(rRegL dst, memory mem)
 4994 %{
 4995   match(Set dst (ConvI2L (LoadUB mem)));
 4996 
 4997   ins_cost(125);
 4998   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 4999 
 5000   ins_encode %{
 5001     __ movzbq($dst$$Register, $mem$$Address);
 5002   %}
 5003 
 5004   ins_pipe(ialu_reg_mem);
 5005 %}
 5006 
 5007 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 5008 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5009   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5010   effect(KILL cr);
 5011 
 5012   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 5013             "andl    $dst, right_n_bits($mask, 8)" %}
 5014   ins_encode %{
 5015     Register Rdst = $dst$$Register;
 5016     __ movzbq(Rdst, $mem$$Address);
 5017     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5018   %}
 5019   ins_pipe(ialu_reg_mem);
 5020 %}
 5021 
 5022 // Load Short (16 bit signed)
 5023 instruct loadS(rRegI dst, memory mem)
 5024 %{
 5025   match(Set dst (LoadS mem));
 5026 
 5027   ins_cost(125);
 5028   format %{ "movswl $dst, $mem\t# short" %}
 5029 
 5030   ins_encode %{
 5031     __ movswl($dst$$Register, $mem$$Address);
 5032   %}
 5033 
 5034   ins_pipe(ialu_reg_mem);
 5035 %}
 5036 
 5037 // Load Short (16 bit signed) to Byte (8 bit signed)
 5038 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5039   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5040 
 5041   ins_cost(125);
 5042   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 5043   ins_encode %{
 5044     __ movsbl($dst$$Register, $mem$$Address);
 5045   %}
 5046   ins_pipe(ialu_reg_mem);
 5047 %}
 5048 
 5049 // Load Short (16 bit signed) into Long Register
 5050 instruct loadS2L(rRegL dst, memory mem)
 5051 %{
 5052   match(Set dst (ConvI2L (LoadS mem)));
 5053 
 5054   ins_cost(125);
 5055   format %{ "movswq $dst, $mem\t# short -> long" %}
 5056 
 5057   ins_encode %{
 5058     __ movswq($dst$$Register, $mem$$Address);
 5059   %}
 5060 
 5061   ins_pipe(ialu_reg_mem);
 5062 %}
 5063 
 5064 // Load Unsigned Short/Char (16 bit UNsigned)
 5065 instruct loadUS(rRegI dst, memory mem)
 5066 %{
 5067   match(Set dst (LoadUS mem));
 5068 
 5069   ins_cost(125);
 5070   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 5071 
 5072   ins_encode %{
 5073     __ movzwl($dst$$Register, $mem$$Address);
 5074   %}
 5075 
 5076   ins_pipe(ialu_reg_mem);
 5077 %}
 5078 
 5079 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5080 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5081   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5082 
 5083   ins_cost(125);
 5084   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 5085   ins_encode %{
 5086     __ movsbl($dst$$Register, $mem$$Address);
 5087   %}
 5088   ins_pipe(ialu_reg_mem);
 5089 %}
 5090 
 5091 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5092 instruct loadUS2L(rRegL dst, memory mem)
 5093 %{
 5094   match(Set dst (ConvI2L (LoadUS mem)));
 5095 
 5096   ins_cost(125);
 5097   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 5098 
 5099   ins_encode %{
 5100     __ movzwq($dst$$Register, $mem$$Address);
 5101   %}
 5102 
 5103   ins_pipe(ialu_reg_mem);
 5104 %}
 5105 
 5106 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5107 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5108   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5109 
 5110   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 5111   ins_encode %{
 5112     __ movzbq($dst$$Register, $mem$$Address);
 5113   %}
 5114   ins_pipe(ialu_reg_mem);
 5115 %}
 5116 
 5117 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 5118 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 5119   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5120   effect(KILL cr);
 5121 
 5122   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5123             "andl    $dst, right_n_bits($mask, 16)" %}
 5124   ins_encode %{
 5125     Register Rdst = $dst$$Register;
 5126     __ movzwq(Rdst, $mem$$Address);
 5127     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5128   %}
 5129   ins_pipe(ialu_reg_mem);
 5130 %}
 5131 
 5132 // Load Integer
 5133 instruct loadI(rRegI dst, memory mem)
 5134 %{
 5135   match(Set dst (LoadI mem));
 5136 
 5137   ins_cost(125);
 5138   format %{ "movl    $dst, $mem\t# int" %}
 5139 
 5140   ins_encode %{
 5141     __ movl($dst$$Register, $mem$$Address);
 5142   %}
 5143 
 5144   ins_pipe(ialu_reg_mem);
 5145 %}
 5146 
 5147 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5148 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5149   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5150 
 5151   ins_cost(125);
 5152   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 5153   ins_encode %{
 5154     __ movsbl($dst$$Register, $mem$$Address);
 5155   %}
 5156   ins_pipe(ialu_reg_mem);
 5157 %}
 5158 
 5159 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5160 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5161   match(Set dst (AndI (LoadI mem) mask));
 5162 
 5163   ins_cost(125);
 5164   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 5165   ins_encode %{
 5166     __ movzbl($dst$$Register, $mem$$Address);
 5167   %}
 5168   ins_pipe(ialu_reg_mem);
 5169 %}
 5170 
 5171 // Load Integer (32 bit signed) to Short (16 bit signed)
 5172 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5173   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5174 
 5175   ins_cost(125);
 5176   format %{ "movswl  $dst, $mem\t# int -> short" %}
 5177   ins_encode %{
 5178     __ movswl($dst$$Register, $mem$$Address);
 5179   %}
 5180   ins_pipe(ialu_reg_mem);
 5181 %}
 5182 
 5183 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5184 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5185   match(Set dst (AndI (LoadI mem) mask));
 5186 
 5187   ins_cost(125);
 5188   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 5189   ins_encode %{
 5190     __ movzwl($dst$$Register, $mem$$Address);
 5191   %}
 5192   ins_pipe(ialu_reg_mem);
 5193 %}
 5194 
 5195 // Load Integer into Long Register
 5196 instruct loadI2L(rRegL dst, memory mem)
 5197 %{
 5198   match(Set dst (ConvI2L (LoadI mem)));
 5199 
 5200   ins_cost(125);
 5201   format %{ "movslq  $dst, $mem\t# int -> long" %}
 5202 
 5203   ins_encode %{
 5204     __ movslq($dst$$Register, $mem$$Address);
 5205   %}
 5206 
 5207   ins_pipe(ialu_reg_mem);
 5208 %}
 5209 
 5210 // Load Integer with mask 0xFF into Long Register
 5211 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 5212   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5213 
 5214   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 5215   ins_encode %{
 5216     __ movzbq($dst$$Register, $mem$$Address);
 5217   %}
 5218   ins_pipe(ialu_reg_mem);
 5219 %}
 5220 
 5221 // Load Integer with mask 0xFFFF into Long Register
 5222 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 5223   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5224 
 5225   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 5226   ins_encode %{
 5227     __ movzwq($dst$$Register, $mem$$Address);
 5228   %}
 5229   ins_pipe(ialu_reg_mem);
 5230 %}
 5231 
 5232 // Load Integer with a 31-bit mask into Long Register
 5233 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 5234   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5235   effect(KILL cr);
 5236 
 5237   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 5238             "andl    $dst, $mask" %}
 5239   ins_encode %{
 5240     Register Rdst = $dst$$Register;
 5241     __ movl(Rdst, $mem$$Address);
 5242     __ andl(Rdst, $mask$$constant);
 5243   %}
 5244   ins_pipe(ialu_reg_mem);
 5245 %}
 5246 
 5247 // Load Unsigned Integer into Long Register
 5248 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 5249 %{
 5250   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5251 
 5252   ins_cost(125);
 5253   format %{ "movl    $dst, $mem\t# uint -> long" %}
 5254 
 5255   ins_encode %{
 5256     __ movl($dst$$Register, $mem$$Address);
 5257   %}
 5258 
 5259   ins_pipe(ialu_reg_mem);
 5260 %}
 5261 
 5262 // Load Long
 5263 instruct loadL(rRegL dst, memory mem)
 5264 %{
 5265   match(Set dst (LoadL mem));
 5266 
 5267   ins_cost(125);
 5268   format %{ "movq    $dst, $mem\t# long" %}
 5269 
 5270   ins_encode %{
 5271     __ movq($dst$$Register, $mem$$Address);
 5272   %}
 5273 
 5274   ins_pipe(ialu_reg_mem); // XXX
 5275 %}
 5276 
 5277 // Load Range
 5278 instruct loadRange(rRegI dst, memory mem)
 5279 %{
 5280   match(Set dst (LoadRange mem));
 5281 
 5282   ins_cost(125); // XXX
 5283   format %{ "movl    $dst, $mem\t# range" %}
 5284   ins_encode %{
 5285     __ movl($dst$$Register, $mem$$Address);
 5286   %}
 5287   ins_pipe(ialu_reg_mem);
 5288 %}
 5289 
 5290 // Load Pointer
 5291 instruct loadP(rRegP dst, memory mem)
 5292 %{
 5293   match(Set dst (LoadP mem));
 5294   predicate(n->as_Load()->barrier_data() == 0);
 5295 
 5296   ins_cost(125); // XXX
 5297   format %{ "movq    $dst, $mem\t# ptr" %}
 5298   ins_encode %{
 5299     __ movq($dst$$Register, $mem$$Address);
 5300   %}
 5301   ins_pipe(ialu_reg_mem); // XXX
 5302 %}
 5303 
 5304 // Load Compressed Pointer
 5305 instruct loadN(rRegN dst, memory mem)
 5306 %{
 5307    match(Set dst (LoadN mem));
 5308 
 5309    ins_cost(125); // XXX
 5310    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 5311    ins_encode %{
 5312      __ movl($dst$$Register, $mem$$Address);
 5313    %}
 5314    ins_pipe(ialu_reg_mem); // XXX
 5315 %}
 5316 
 5317 
 5318 // Load Klass Pointer
 5319 instruct loadKlass(rRegP dst, memory mem)
 5320 %{
 5321   match(Set dst (LoadKlass mem));
 5322 
 5323   ins_cost(125); // XXX
 5324   format %{ "movq    $dst, $mem\t# class" %}
 5325   ins_encode %{
 5326     __ movq($dst$$Register, $mem$$Address);
 5327   %}
 5328   ins_pipe(ialu_reg_mem); // XXX
 5329 %}
 5330 
 5331 // Load narrow Klass Pointer
 5332 instruct loadNKlass(rRegN dst, memory mem)
 5333 %{
 5334   match(Set dst (LoadNKlass mem));
 5335 
 5336   ins_cost(125); // XXX
 5337   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 5338   ins_encode %{
 5339     __ movl($dst$$Register, $mem$$Address);
 5340   %}
 5341   ins_pipe(ialu_reg_mem); // XXX
 5342 %}
 5343 
 5344 // Load Float
 5345 instruct loadF(regF dst, memory mem)
 5346 %{
 5347   match(Set dst (LoadF mem));
 5348 
 5349   ins_cost(145); // XXX
 5350   format %{ "movss   $dst, $mem\t# float" %}
 5351   ins_encode %{
 5352     __ movflt($dst$$XMMRegister, $mem$$Address);
 5353   %}
 5354   ins_pipe(pipe_slow); // XXX
 5355 %}
 5356 
 5357 // Load Double
 5358 instruct loadD_partial(regD dst, memory mem)
 5359 %{
 5360   predicate(!UseXmmLoadAndClearUpper);
 5361   match(Set dst (LoadD mem));
 5362 
 5363   ins_cost(145); // XXX
 5364   format %{ "movlpd  $dst, $mem\t# double" %}
 5365   ins_encode %{
 5366     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5367   %}
 5368   ins_pipe(pipe_slow); // XXX
 5369 %}
 5370 
 5371 instruct loadD(regD dst, memory mem)
 5372 %{
 5373   predicate(UseXmmLoadAndClearUpper);
 5374   match(Set dst (LoadD mem));
 5375 
 5376   ins_cost(145); // XXX
 5377   format %{ "movsd   $dst, $mem\t# double" %}
 5378   ins_encode %{
 5379     __ movdbl($dst$$XMMRegister, $mem$$Address);
 5380   %}
 5381   ins_pipe(pipe_slow); // XXX
 5382 %}
 5383 
 5384 
 5385 // Following pseudo code describes the algorithm for max[FD]:
 5386 // Min algorithm is on similar lines
 5387 //  btmp = (b < +0.0) ? a : b
 5388 //  atmp = (b < +0.0) ? b : a
 5389 //  Tmp  = Max_Float(atmp , btmp)
 5390 //  Res  = (atmp == NaN) ? atmp : Tmp
 5391 
 5392 // max = java.lang.Math.max(float a, float b)
 5393 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5394   predicate(UseAVX > 0 && !n->is_reduction());
 5395   match(Set dst (MaxF a b));
 5396   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5397   format %{
 5398      "vblendvps        $btmp,$b,$a,$b           \n\t"
 5399      "vblendvps        $atmp,$a,$b,$b           \n\t"
 5400      "vmaxss           $tmp,$atmp,$btmp         \n\t"
 5401      "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
 5402      "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
 5403   %}
 5404   ins_encode %{
 5405     int vector_len = Assembler::AVX_128bit;
 5406     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5407     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5408     __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5409     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5410     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5411  %}
 5412   ins_pipe( pipe_slow );
 5413 %}
 5414 
 5415 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5416   predicate(UseAVX > 0 && n->is_reduction());
 5417   match(Set dst (MaxF a b));
 5418   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5419 
 5420   format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
 5421   ins_encode %{
 5422     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5423                     false /*min*/, true /*single*/);
 5424   %}
 5425   ins_pipe( pipe_slow );
 5426 %}
 5427 
 5428 // max = java.lang.Math.max(double a, double b)
 5429 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5430   predicate(UseAVX > 0 && !n->is_reduction());
 5431   match(Set dst (MaxD a b));
 5432   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 5433   format %{
 5434      "vblendvpd        $btmp,$b,$a,$b            \n\t"
 5435      "vblendvpd        $atmp,$a,$b,$b            \n\t"
 5436      "vmaxsd           $tmp,$atmp,$btmp          \n\t"
 5437      "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
 5438      "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
 5439   %}
 5440   ins_encode %{
 5441     int vector_len = Assembler::AVX_128bit;
 5442     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
 5443     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
 5444     __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5445     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5446     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5447   %}
 5448   ins_pipe( pipe_slow );
 5449 %}
 5450 
 5451 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5452   predicate(UseAVX > 0 && n->is_reduction());
 5453   match(Set dst (MaxD a b));
 5454   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5455 
 5456   format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
 5457   ins_encode %{
 5458     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5459                     false /*min*/, false /*single*/);
 5460   %}
 5461   ins_pipe( pipe_slow );
 5462 %}
 5463 
 5464 // min = java.lang.Math.min(float a, float b)
 5465 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 5466   predicate(UseAVX > 0 && !n->is_reduction());
 5467   match(Set dst (MinF a b));
 5468   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5469   format %{
 5470      "vblendvps        $atmp,$a,$b,$a             \n\t"
 5471      "vblendvps        $btmp,$b,$a,$a             \n\t"
 5472      "vminss           $tmp,$atmp,$btmp           \n\t"
 5473      "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
 5474      "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
 5475   %}
 5476   ins_encode %{
 5477     int vector_len = Assembler::AVX_128bit;
 5478     __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5479     __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5480     __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5481     __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5482     __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5483   %}
 5484   ins_pipe( pipe_slow );
 5485 %}
 5486 
 5487 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
 5488   predicate(UseAVX > 0 && n->is_reduction());
 5489   match(Set dst (MinF a b));
 5490   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5491 
 5492   format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
 5493   ins_encode %{
 5494     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5495                     true /*min*/, true /*single*/);
 5496   %}
 5497   ins_pipe( pipe_slow );
 5498 %}
 5499 
 5500 // min = java.lang.Math.min(double a, double b)
 5501 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 5502   predicate(UseAVX > 0 && !n->is_reduction());
 5503   match(Set dst (MinD a b));
 5504   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 5505   format %{
 5506      "vblendvpd        $atmp,$a,$b,$a           \n\t"
 5507      "vblendvpd        $btmp,$b,$a,$a           \n\t"
 5508      "vminsd           $tmp,$atmp,$btmp         \n\t"
 5509      "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
 5510      "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
 5511   %}
 5512   ins_encode %{
 5513     int vector_len = Assembler::AVX_128bit;
 5514     __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
 5515     __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
 5516     __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
 5517     __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
 5518     __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
 5519   %}
 5520   ins_pipe( pipe_slow );
 5521 %}
 5522 
 5523 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
 5524   predicate(UseAVX > 0 && n->is_reduction());
 5525   match(Set dst (MinD a b));
 5526   effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
 5527 
 5528   format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
 5529   ins_encode %{
 5530     emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
 5531                     true /*min*/, false /*single*/);
 5532   %}
 5533   ins_pipe( pipe_slow );
 5534 %}
 5535 
 5536 // Load Effective Address
 5537 instruct leaP8(rRegP dst, indOffset8 mem)
 5538 %{
 5539   match(Set dst mem);
 5540 
 5541   ins_cost(110); // XXX
 5542   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 5543   ins_encode %{
 5544     __ leaq($dst$$Register, $mem$$Address);
 5545   %}
 5546   ins_pipe(ialu_reg_reg_fat);
 5547 %}
 5548 
 5549 instruct leaP32(rRegP dst, indOffset32 mem)
 5550 %{
 5551   match(Set dst mem);
 5552 
 5553   ins_cost(110);
 5554   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 5555   ins_encode %{
 5556     __ leaq($dst$$Register, $mem$$Address);
 5557   %}
 5558   ins_pipe(ialu_reg_reg_fat);
 5559 %}
 5560 
 5561 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 5562 %{
 5563   match(Set dst mem);
 5564 
 5565   ins_cost(110);
 5566   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 5567   ins_encode %{
 5568     __ leaq($dst$$Register, $mem$$Address);
 5569   %}
 5570   ins_pipe(ialu_reg_reg_fat);
 5571 %}
 5572 
 5573 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 5574 %{
 5575   match(Set dst mem);
 5576 
 5577   ins_cost(110);
 5578   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5579   ins_encode %{
 5580     __ leaq($dst$$Register, $mem$$Address);
 5581   %}
 5582   ins_pipe(ialu_reg_reg_fat);
 5583 %}
 5584 
 5585 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 5586 %{
 5587   match(Set dst mem);
 5588 
 5589   ins_cost(110);
 5590   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 5591   ins_encode %{
 5592     __ leaq($dst$$Register, $mem$$Address);
 5593   %}
 5594   ins_pipe(ialu_reg_reg_fat);
 5595 %}
 5596 
 5597 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 5598 %{
 5599   match(Set dst mem);
 5600 
 5601   ins_cost(110);
 5602   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 5603   ins_encode %{
 5604     __ leaq($dst$$Register, $mem$$Address);
 5605   %}
 5606   ins_pipe(ialu_reg_reg_fat);
 5607 %}
 5608 
 5609 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 5610 %{
 5611   match(Set dst mem);
 5612 
 5613   ins_cost(110);
 5614   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 5615   ins_encode %{
 5616     __ leaq($dst$$Register, $mem$$Address);
 5617   %}
 5618   ins_pipe(ialu_reg_reg_fat);
 5619 %}
 5620 
 5621 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 5622 %{
 5623   match(Set dst mem);
 5624 
 5625   ins_cost(110);
 5626   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 5627   ins_encode %{
 5628     __ leaq($dst$$Register, $mem$$Address);
 5629   %}
 5630   ins_pipe(ialu_reg_reg_fat);
 5631 %}
 5632 
 5633 // Load Effective Address which uses Narrow (32-bits) oop
 5634 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 5635 %{
 5636   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 5637   match(Set dst mem);
 5638 
 5639   ins_cost(110);
 5640   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 5641   ins_encode %{
 5642     __ leaq($dst$$Register, $mem$$Address);
 5643   %}
 5644   ins_pipe(ialu_reg_reg_fat);
 5645 %}
 5646 
 5647 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 5648 %{
 5649   predicate(CompressedOops::shift() == 0);
 5650   match(Set dst mem);
 5651 
 5652   ins_cost(110); // XXX
 5653   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 5654   ins_encode %{
 5655     __ leaq($dst$$Register, $mem$$Address);
 5656   %}
 5657   ins_pipe(ialu_reg_reg_fat);
 5658 %}
 5659 
 5660 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 5661 %{
 5662   predicate(CompressedOops::shift() == 0);
 5663   match(Set dst mem);
 5664 
 5665   ins_cost(110);
 5666   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 5667   ins_encode %{
 5668     __ leaq($dst$$Register, $mem$$Address);
 5669   %}
 5670   ins_pipe(ialu_reg_reg_fat);
 5671 %}
 5672 
 5673 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 5674 %{
 5675   predicate(CompressedOops::shift() == 0);
 5676   match(Set dst mem);
 5677 
 5678   ins_cost(110);
 5679   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 5680   ins_encode %{
 5681     __ leaq($dst$$Register, $mem$$Address);
 5682   %}
 5683   ins_pipe(ialu_reg_reg_fat);
 5684 %}
 5685 
 5686 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 5687 %{
 5688   predicate(CompressedOops::shift() == 0);
 5689   match(Set dst mem);
 5690 
 5691   ins_cost(110);
 5692   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 5693   ins_encode %{
 5694     __ leaq($dst$$Register, $mem$$Address);
 5695   %}
 5696   ins_pipe(ialu_reg_reg_fat);
 5697 %}
 5698 
 5699 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 5700 %{
 5701   predicate(CompressedOops::shift() == 0);
 5702   match(Set dst mem);
 5703 
 5704   ins_cost(110);
 5705   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 5706   ins_encode %{
 5707     __ leaq($dst$$Register, $mem$$Address);
 5708   %}
 5709   ins_pipe(ialu_reg_reg_fat);
 5710 %}
 5711 
 5712 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 5713 %{
 5714   predicate(CompressedOops::shift() == 0);
 5715   match(Set dst mem);
 5716 
 5717   ins_cost(110);
 5718   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 5719   ins_encode %{
 5720     __ leaq($dst$$Register, $mem$$Address);
 5721   %}
 5722   ins_pipe(ialu_reg_reg_fat);
 5723 %}
 5724 
 5725 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 5726 %{
 5727   predicate(CompressedOops::shift() == 0);
 5728   match(Set dst mem);
 5729 
 5730   ins_cost(110);
 5731   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 5732   ins_encode %{
 5733     __ leaq($dst$$Register, $mem$$Address);
 5734   %}
 5735   ins_pipe(ialu_reg_reg_fat);
 5736 %}
 5737 
 5738 instruct loadConI(rRegI dst, immI src)
 5739 %{
 5740   match(Set dst src);
 5741 
 5742   format %{ "movl    $dst, $src\t# int" %}
 5743   ins_encode %{
 5744     __ movl($dst$$Register, $src$$constant);
 5745   %}
 5746   ins_pipe(ialu_reg_fat); // XXX
 5747 %}
 5748 
 5749 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 5750 %{
 5751   match(Set dst src);
 5752   effect(KILL cr);
 5753 
 5754   ins_cost(50);
 5755   format %{ "xorl    $dst, $dst\t# int" %}
 5756   ins_encode %{
 5757     __ xorl($dst$$Register, $dst$$Register);
 5758   %}
 5759   ins_pipe(ialu_reg);
 5760 %}
 5761 
 5762 instruct loadConL(rRegL dst, immL src)
 5763 %{
 5764   match(Set dst src);
 5765 
 5766   ins_cost(150);
 5767   format %{ "movq    $dst, $src\t# long" %}
 5768   ins_encode %{
 5769     __ mov64($dst$$Register, $src$$constant);
 5770   %}
 5771   ins_pipe(ialu_reg);
 5772 %}
 5773 
 5774 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 5775 %{
 5776   match(Set dst src);
 5777   effect(KILL cr);
 5778 
 5779   ins_cost(50);
 5780   format %{ "xorl    $dst, $dst\t# long" %}
 5781   ins_encode %{
 5782     __ xorl($dst$$Register, $dst$$Register);
 5783   %}
 5784   ins_pipe(ialu_reg); // XXX
 5785 %}
 5786 
 5787 instruct loadConUL32(rRegL dst, immUL32 src)
 5788 %{
 5789   match(Set dst src);
 5790 
 5791   ins_cost(60);
 5792   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 5793   ins_encode %{
 5794     __ movl($dst$$Register, $src$$constant);
 5795   %}
 5796   ins_pipe(ialu_reg);
 5797 %}
 5798 
 5799 instruct loadConL32(rRegL dst, immL32 src)
 5800 %{
 5801   match(Set dst src);
 5802 
 5803   ins_cost(70);
 5804   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 5805   ins_encode %{
 5806     __ movq($dst$$Register, $src$$constant);
 5807   %}
 5808   ins_pipe(ialu_reg);
 5809 %}
 5810 
 5811 instruct loadConP(rRegP dst, immP con) %{
 5812   match(Set dst con);
 5813 
 5814   format %{ "movq    $dst, $con\t# ptr" %}
 5815   ins_encode %{
 5816     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 5817   %}
 5818   ins_pipe(ialu_reg_fat); // XXX
 5819 %}
 5820 
 5821 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 5822 %{
 5823   match(Set dst src);
 5824   effect(KILL cr);
 5825 
 5826   ins_cost(50);
 5827   format %{ "xorl    $dst, $dst\t# ptr" %}
 5828   ins_encode %{
 5829     __ xorl($dst$$Register, $dst$$Register);
 5830   %}
 5831   ins_pipe(ialu_reg);
 5832 %}
 5833 
 5834 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 5835 %{
 5836   match(Set dst src);
 5837   effect(KILL cr);
 5838 
 5839   ins_cost(60);
 5840   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 5841   ins_encode %{
 5842     __ movl($dst$$Register, $src$$constant);
 5843   %}
 5844   ins_pipe(ialu_reg);
 5845 %}
 5846 
 5847 instruct loadConF(regF dst, immF con) %{
 5848   match(Set dst con);
 5849   ins_cost(125);
 5850   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 5851   ins_encode %{
 5852     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5853   %}
 5854   ins_pipe(pipe_slow);
 5855 %}
 5856 
 5857 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 5858   match(Set dst src);
 5859   effect(KILL cr);
 5860   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
 5861   ins_encode %{
 5862     __ xorq($dst$$Register, $dst$$Register);
 5863   %}
 5864   ins_pipe(ialu_reg);
 5865 %}
 5866 
 5867 instruct loadConN(rRegN dst, immN src) %{
 5868   match(Set dst src);
 5869 
 5870   ins_cost(125);
 5871   format %{ "movl    $dst, $src\t# compressed ptr" %}
 5872   ins_encode %{
 5873     address con = (address)$src$$constant;
 5874     if (con == NULL) {
 5875       ShouldNotReachHere();
 5876     } else {
 5877       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 5878     }
 5879   %}
 5880   ins_pipe(ialu_reg_fat); // XXX
 5881 %}
 5882 
 5883 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 5884   match(Set dst src);
 5885 
 5886   ins_cost(125);
 5887   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 5888   ins_encode %{
 5889     address con = (address)$src$$constant;
 5890     if (con == NULL) {
 5891       ShouldNotReachHere();
 5892     } else {
 5893       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 5894     }
 5895   %}
 5896   ins_pipe(ialu_reg_fat); // XXX
 5897 %}
 5898 
 5899 instruct loadConF0(regF dst, immF0 src)
 5900 %{
 5901   match(Set dst src);
 5902   ins_cost(100);
 5903 
 5904   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 5905   ins_encode %{
 5906     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5907   %}
 5908   ins_pipe(pipe_slow);
 5909 %}
 5910 
 5911 // Use the same format since predicate() can not be used here.
 5912 instruct loadConD(regD dst, immD con) %{
 5913   match(Set dst con);
 5914   ins_cost(125);
 5915   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 5916   ins_encode %{
 5917     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5918   %}
 5919   ins_pipe(pipe_slow);
 5920 %}
 5921 
 5922 instruct loadConD0(regD dst, immD0 src)
 5923 %{
 5924   match(Set dst src);
 5925   ins_cost(100);
 5926 
 5927   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 5928   ins_encode %{
 5929     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5930   %}
 5931   ins_pipe(pipe_slow);
 5932 %}
 5933 
 5934 instruct loadSSI(rRegI dst, stackSlotI src)
 5935 %{
 5936   match(Set dst src);
 5937 
 5938   ins_cost(125);
 5939   format %{ "movl    $dst, $src\t# int stk" %}
 5940   opcode(0x8B);
 5941   ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
 5942   ins_pipe(ialu_reg_mem);
 5943 %}
 5944 
 5945 instruct loadSSL(rRegL dst, stackSlotL src)
 5946 %{
 5947   match(Set dst src);
 5948 
 5949   ins_cost(125);
 5950   format %{ "movq    $dst, $src\t# long stk" %}
 5951   opcode(0x8B);
 5952   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5953   ins_pipe(ialu_reg_mem);
 5954 %}
 5955 
 5956 instruct loadSSP(rRegP dst, stackSlotP src)
 5957 %{
 5958   match(Set dst src);
 5959 
 5960   ins_cost(125);
 5961   format %{ "movq    $dst, $src\t# ptr stk" %}
 5962   opcode(0x8B);
 5963   ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
 5964   ins_pipe(ialu_reg_mem);
 5965 %}
 5966 
 5967 instruct loadSSF(regF dst, stackSlotF src)
 5968 %{
 5969   match(Set dst src);
 5970 
 5971   ins_cost(125);
 5972   format %{ "movss   $dst, $src\t# float stk" %}
 5973   ins_encode %{
 5974     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 5975   %}
 5976   ins_pipe(pipe_slow); // XXX
 5977 %}
 5978 
 5979 // Use the same format since predicate() can not be used here.
 5980 instruct loadSSD(regD dst, stackSlotD src)
 5981 %{
 5982   match(Set dst src);
 5983 
 5984   ins_cost(125);
 5985   format %{ "movsd   $dst, $src\t# double stk" %}
 5986   ins_encode  %{
 5987     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 5988   %}
 5989   ins_pipe(pipe_slow); // XXX
 5990 %}
 5991 
 5992 // Prefetch instructions for allocation.
 5993 // Must be safe to execute with invalid address (cannot fault).
 5994 
 5995 instruct prefetchAlloc( memory mem ) %{
 5996   predicate(AllocatePrefetchInstr==3);
 5997   match(PrefetchAllocation mem);
 5998   ins_cost(125);
 5999 
 6000   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 6001   ins_encode %{
 6002     __ prefetchw($mem$$Address);
 6003   %}
 6004   ins_pipe(ialu_mem);
 6005 %}
 6006 
 6007 instruct prefetchAllocNTA( memory mem ) %{
 6008   predicate(AllocatePrefetchInstr==0);
 6009   match(PrefetchAllocation mem);
 6010   ins_cost(125);
 6011 
 6012   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 6013   ins_encode %{
 6014     __ prefetchnta($mem$$Address);
 6015   %}
 6016   ins_pipe(ialu_mem);
 6017 %}
 6018 
 6019 instruct prefetchAllocT0( memory mem ) %{
 6020   predicate(AllocatePrefetchInstr==1);
 6021   match(PrefetchAllocation mem);
 6022   ins_cost(125);
 6023 
 6024   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 6025   ins_encode %{
 6026     __ prefetcht0($mem$$Address);
 6027   %}
 6028   ins_pipe(ialu_mem);
 6029 %}
 6030 
 6031 instruct prefetchAllocT2( memory mem ) %{
 6032   predicate(AllocatePrefetchInstr==2);
 6033   match(PrefetchAllocation mem);
 6034   ins_cost(125);
 6035 
 6036   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 6037   ins_encode %{
 6038     __ prefetcht2($mem$$Address);
 6039   %}
 6040   ins_pipe(ialu_mem);
 6041 %}
 6042 
 6043 //----------Store Instructions-------------------------------------------------
 6044 
 6045 // Store Byte
 6046 instruct storeB(memory mem, rRegI src)
 6047 %{
 6048   match(Set mem (StoreB mem src));
 6049 
 6050   ins_cost(125); // XXX
 6051   format %{ "movb    $mem, $src\t# byte" %}
 6052   ins_encode %{
 6053     __ movb($mem$$Address, $src$$Register);
 6054   %}
 6055   ins_pipe(ialu_mem_reg);
 6056 %}
 6057 
 6058 // Store Char/Short
 6059 instruct storeC(memory mem, rRegI src)
 6060 %{
 6061   match(Set mem (StoreC mem src));
 6062 
 6063   ins_cost(125); // XXX
 6064   format %{ "movw    $mem, $src\t# char/short" %}
 6065   ins_encode %{
 6066     __ movw($mem$$Address, $src$$Register);
 6067   %}
 6068   ins_pipe(ialu_mem_reg);
 6069 %}
 6070 
 6071 // Store Integer
 6072 instruct storeI(memory mem, rRegI src)
 6073 %{
 6074   match(Set mem (StoreI mem src));
 6075 
 6076   ins_cost(125); // XXX
 6077   format %{ "movl    $mem, $src\t# int" %}
 6078   ins_encode %{
 6079     __ movl($mem$$Address, $src$$Register);
 6080   %}
 6081   ins_pipe(ialu_mem_reg);
 6082 %}
 6083 
 6084 // Store Long
 6085 instruct storeL(memory mem, rRegL src)
 6086 %{
 6087   match(Set mem (StoreL mem src));
 6088 
 6089   ins_cost(125); // XXX
 6090   format %{ "movq    $mem, $src\t# long" %}
 6091   ins_encode %{
 6092     __ movq($mem$$Address, $src$$Register);
 6093   %}
 6094   ins_pipe(ialu_mem_reg); // XXX
 6095 %}
 6096 
 6097 // Store Pointer
 6098 instruct storeP(memory mem, any_RegP src)
 6099 %{
 6100   match(Set mem (StoreP mem src));
 6101 
 6102   ins_cost(125); // XXX
 6103   format %{ "movq    $mem, $src\t# ptr" %}
 6104   ins_encode %{
 6105     __ movq($mem$$Address, $src$$Register);
 6106   %}
 6107   ins_pipe(ialu_mem_reg);
 6108 %}
 6109 
 6110 instruct storeImmP0(memory mem, immP0 zero)
 6111 %{
 6112   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6113   match(Set mem (StoreP mem zero));
 6114 
 6115   ins_cost(125); // XXX
 6116   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 6117   ins_encode %{
 6118     __ movq($mem$$Address, r12);
 6119   %}
 6120   ins_pipe(ialu_mem_reg);
 6121 %}
 6122 
 6123 // Store NULL Pointer, mark word, or other simple pointer constant.
 6124 instruct storeImmP(memory mem, immP31 src)
 6125 %{
 6126   match(Set mem (StoreP mem src));
 6127 
 6128   ins_cost(150); // XXX
 6129   format %{ "movq    $mem, $src\t# ptr" %}
 6130   ins_encode %{
 6131     __ movq($mem$$Address, $src$$constant);
 6132   %}
 6133   ins_pipe(ialu_mem_imm);
 6134 %}
 6135 
 6136 // Store Compressed Pointer
 6137 instruct storeN(memory mem, rRegN src)
 6138 %{
 6139   match(Set mem (StoreN mem src));
 6140 
 6141   ins_cost(125); // XXX
 6142   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6143   ins_encode %{
 6144     __ movl($mem$$Address, $src$$Register);
 6145   %}
 6146   ins_pipe(ialu_mem_reg);
 6147 %}
 6148 
 6149 instruct storeNKlass(memory mem, rRegN src)
 6150 %{
 6151   match(Set mem (StoreNKlass mem src));
 6152 
 6153   ins_cost(125); // XXX
 6154   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6155   ins_encode %{
 6156     __ movl($mem$$Address, $src$$Register);
 6157   %}
 6158   ins_pipe(ialu_mem_reg);
 6159 %}
 6160 
 6161 instruct storeImmN0(memory mem, immN0 zero)
 6162 %{
 6163   predicate(CompressedOops::base() == NULL);
 6164   match(Set mem (StoreN mem zero));
 6165 
 6166   ins_cost(125); // XXX
 6167   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 6168   ins_encode %{
 6169     __ movl($mem$$Address, r12);
 6170   %}
 6171   ins_pipe(ialu_mem_reg);
 6172 %}
 6173 
 6174 instruct storeImmN(memory mem, immN src)
 6175 %{
 6176   match(Set mem (StoreN mem src));
 6177 
 6178   ins_cost(150); // XXX
 6179   format %{ "movl    $mem, $src\t# compressed ptr" %}
 6180   ins_encode %{
 6181     address con = (address)$src$$constant;
 6182     if (con == NULL) {
 6183       __ movl($mem$$Address, 0);
 6184     } else {
 6185       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 6186     }
 6187   %}
 6188   ins_pipe(ialu_mem_imm);
 6189 %}
 6190 
 6191 instruct storeImmNKlass(memory mem, immNKlass src)
 6192 %{
 6193   match(Set mem (StoreNKlass mem src));
 6194 
 6195   ins_cost(150); // XXX
 6196   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 6197   ins_encode %{
 6198     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 6199   %}
 6200   ins_pipe(ialu_mem_imm);
 6201 %}
 6202 
 6203 // Store Integer Immediate
 6204 instruct storeImmI0(memory mem, immI_0 zero)
 6205 %{
 6206   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6207   match(Set mem (StoreI mem zero));
 6208 
 6209   ins_cost(125); // XXX
 6210   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 6211   ins_encode %{
 6212     __ movl($mem$$Address, r12);
 6213   %}
 6214   ins_pipe(ialu_mem_reg);
 6215 %}
 6216 
 6217 instruct storeImmI(memory mem, immI src)
 6218 %{
 6219   match(Set mem (StoreI mem src));
 6220 
 6221   ins_cost(150);
 6222   format %{ "movl    $mem, $src\t# int" %}
 6223   ins_encode %{
 6224     __ movl($mem$$Address, $src$$constant);
 6225   %}
 6226   ins_pipe(ialu_mem_imm);
 6227 %}
 6228 
 6229 // Store Long Immediate
 6230 instruct storeImmL0(memory mem, immL0 zero)
 6231 %{
 6232   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6233   match(Set mem (StoreL mem zero));
 6234 
 6235   ins_cost(125); // XXX
 6236   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 6237   ins_encode %{
 6238     __ movq($mem$$Address, r12);
 6239   %}
 6240   ins_pipe(ialu_mem_reg);
 6241 %}
 6242 
 6243 instruct storeImmL(memory mem, immL32 src)
 6244 %{
 6245   match(Set mem (StoreL mem src));
 6246 
 6247   ins_cost(150);
 6248   format %{ "movq    $mem, $src\t# long" %}
 6249   ins_encode %{
 6250     __ movq($mem$$Address, $src$$constant);
 6251   %}
 6252   ins_pipe(ialu_mem_imm);
 6253 %}
 6254 
 6255 // Store Short/Char Immediate
 6256 instruct storeImmC0(memory mem, immI_0 zero)
 6257 %{
 6258   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6259   match(Set mem (StoreC mem zero));
 6260 
 6261   ins_cost(125); // XXX
 6262   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6263   ins_encode %{
 6264     __ movw($mem$$Address, r12);
 6265   %}
 6266   ins_pipe(ialu_mem_reg);
 6267 %}
 6268 
 6269 instruct storeImmI16(memory mem, immI16 src)
 6270 %{
 6271   predicate(UseStoreImmI16);
 6272   match(Set mem (StoreC mem src));
 6273 
 6274   ins_cost(150);
 6275   format %{ "movw    $mem, $src\t# short/char" %}
 6276   ins_encode %{
 6277     __ movw($mem$$Address, $src$$constant);
 6278   %}
 6279   ins_pipe(ialu_mem_imm);
 6280 %}
 6281 
 6282 // Store Byte Immediate
 6283 instruct storeImmB0(memory mem, immI_0 zero)
 6284 %{
 6285   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6286   match(Set mem (StoreB mem zero));
 6287 
 6288   ins_cost(125); // XXX
 6289   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 6290   ins_encode %{
 6291     __ movb($mem$$Address, r12);
 6292   %}
 6293   ins_pipe(ialu_mem_reg);
 6294 %}
 6295 
 6296 instruct storeImmB(memory mem, immI8 src)
 6297 %{
 6298   match(Set mem (StoreB mem src));
 6299 
 6300   ins_cost(150); // XXX
 6301   format %{ "movb    $mem, $src\t# byte" %}
 6302   ins_encode %{
 6303     __ movb($mem$$Address, $src$$constant);
 6304   %}
 6305   ins_pipe(ialu_mem_imm);
 6306 %}
 6307 
 6308 // Store CMS card-mark Immediate
 6309 instruct storeImmCM0_reg(memory mem, immI_0 zero)
 6310 %{
 6311   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6312   match(Set mem (StoreCM mem zero));
 6313 
 6314   ins_cost(125); // XXX
 6315   format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
 6316   ins_encode %{
 6317     __ movb($mem$$Address, r12);
 6318   %}
 6319   ins_pipe(ialu_mem_reg);
 6320 %}
 6321 
 6322 instruct storeImmCM0(memory mem, immI_0 src)
 6323 %{
 6324   match(Set mem (StoreCM mem src));
 6325 
 6326   ins_cost(150); // XXX
 6327   format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
 6328   ins_encode %{
 6329     __ movb($mem$$Address, $src$$constant);
 6330   %}
 6331   ins_pipe(ialu_mem_imm);
 6332 %}
 6333 
 6334 // Store Float
 6335 instruct storeF(memory mem, regF src)
 6336 %{
 6337   match(Set mem (StoreF mem src));
 6338 
 6339   ins_cost(95); // XXX
 6340   format %{ "movss   $mem, $src\t# float" %}
 6341   ins_encode %{
 6342     __ movflt($mem$$Address, $src$$XMMRegister);
 6343   %}
 6344   ins_pipe(pipe_slow); // XXX
 6345 %}
 6346 
 6347 // Store immediate Float value (it is faster than store from XMM register)
 6348 instruct storeF0(memory mem, immF0 zero)
 6349 %{
 6350   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6351   match(Set mem (StoreF mem zero));
 6352 
 6353   ins_cost(25); // XXX
 6354   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 6355   ins_encode %{
 6356     __ movl($mem$$Address, r12);
 6357   %}
 6358   ins_pipe(ialu_mem_reg);
 6359 %}
 6360 
 6361 instruct storeF_imm(memory mem, immF src)
 6362 %{
 6363   match(Set mem (StoreF mem src));
 6364 
 6365   ins_cost(50);
 6366   format %{ "movl    $mem, $src\t# float" %}
 6367   ins_encode %{
 6368     __ movl($mem$$Address, jint_cast($src$$constant));
 6369   %}
 6370   ins_pipe(ialu_mem_imm);
 6371 %}
 6372 
 6373 // Store Double
 6374 instruct storeD(memory mem, regD src)
 6375 %{
 6376   match(Set mem (StoreD mem src));
 6377 
 6378   ins_cost(95); // XXX
 6379   format %{ "movsd   $mem, $src\t# double" %}
 6380   ins_encode %{
 6381     __ movdbl($mem$$Address, $src$$XMMRegister);
 6382   %}
 6383   ins_pipe(pipe_slow); // XXX
 6384 %}
 6385 
 6386 // Store immediate double 0.0 (it is faster than store from XMM register)
 6387 instruct storeD0_imm(memory mem, immD0 src)
 6388 %{
 6389   predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
 6390   match(Set mem (StoreD mem src));
 6391 
 6392   ins_cost(50);
 6393   format %{ "movq    $mem, $src\t# double 0." %}
 6394   ins_encode %{
 6395     __ movq($mem$$Address, $src$$constant);
 6396   %}
 6397   ins_pipe(ialu_mem_imm);
 6398 %}
 6399 
 6400 instruct storeD0(memory mem, immD0 zero)
 6401 %{
 6402   predicate(UseCompressedOops && (CompressedOops::base() == NULL));
 6403   match(Set mem (StoreD mem zero));
 6404 
 6405   ins_cost(25); // XXX
 6406   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 6407   ins_encode %{
 6408     __ movq($mem$$Address, r12);
 6409   %}
 6410   ins_pipe(ialu_mem_reg);
 6411 %}
 6412 
 6413 instruct storeSSI(stackSlotI dst, rRegI src)
 6414 %{
 6415   match(Set dst src);
 6416 
 6417   ins_cost(100);
 6418   format %{ "movl    $dst, $src\t# int stk" %}
 6419   opcode(0x89);
 6420   ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
 6421   ins_pipe( ialu_mem_reg );
 6422 %}
 6423 
 6424 instruct storeSSL(stackSlotL dst, rRegL src)
 6425 %{
 6426   match(Set dst src);
 6427 
 6428   ins_cost(100);
 6429   format %{ "movq    $dst, $src\t# long stk" %}
 6430   opcode(0x89);
 6431   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6432   ins_pipe(ialu_mem_reg);
 6433 %}
 6434 
 6435 instruct storeSSP(stackSlotP dst, rRegP src)
 6436 %{
 6437   match(Set dst src);
 6438 
 6439   ins_cost(100);
 6440   format %{ "movq    $dst, $src\t# ptr stk" %}
 6441   opcode(0x89);
 6442   ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
 6443   ins_pipe(ialu_mem_reg);
 6444 %}
 6445 
 6446 instruct storeSSF(stackSlotF dst, regF src)
 6447 %{
 6448   match(Set dst src);
 6449 
 6450   ins_cost(95); // XXX
 6451   format %{ "movss   $dst, $src\t# float stk" %}
 6452   ins_encode %{
 6453     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6454   %}
 6455   ins_pipe(pipe_slow); // XXX
 6456 %}
 6457 
 6458 instruct storeSSD(stackSlotD dst, regD src)
 6459 %{
 6460   match(Set dst src);
 6461 
 6462   ins_cost(95); // XXX
 6463   format %{ "movsd   $dst, $src\t# double stk" %}
 6464   ins_encode %{
 6465     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 6466   %}
 6467   ins_pipe(pipe_slow); // XXX
 6468 %}
 6469 
 6470 instruct cacheWB(indirect addr)
 6471 %{
 6472   predicate(VM_Version::supports_data_cache_line_flush());
 6473   match(CacheWB addr);
 6474 
 6475   ins_cost(100);
 6476   format %{"cache wb $addr" %}
 6477   ins_encode %{
 6478     assert($addr->index_position() < 0, "should be");
 6479     assert($addr$$disp == 0, "should be");
 6480     __ cache_wb(Address($addr$$base$$Register, 0));
 6481   %}
 6482   ins_pipe(pipe_slow); // XXX
 6483 %}
 6484 
 6485 instruct cacheWBPreSync()
 6486 %{
 6487   predicate(VM_Version::supports_data_cache_line_flush());
 6488   match(CacheWBPreSync);
 6489 
 6490   ins_cost(100);
 6491   format %{"cache wb presync" %}
 6492   ins_encode %{
 6493     __ cache_wbsync(true);
 6494   %}
 6495   ins_pipe(pipe_slow); // XXX
 6496 %}
 6497 
 6498 instruct cacheWBPostSync()
 6499 %{
 6500   predicate(VM_Version::supports_data_cache_line_flush());
 6501   match(CacheWBPostSync);
 6502 
 6503   ins_cost(100);
 6504   format %{"cache wb postsync" %}
 6505   ins_encode %{
 6506     __ cache_wbsync(false);
 6507   %}
 6508   ins_pipe(pipe_slow); // XXX
 6509 %}
 6510 
 6511 //----------BSWAP Instructions-------------------------------------------------
 6512 instruct bytes_reverse_int(rRegI dst) %{
 6513   match(Set dst (ReverseBytesI dst));
 6514 
 6515   format %{ "bswapl  $dst" %}
 6516   ins_encode %{
 6517     __ bswapl($dst$$Register);
 6518   %}
 6519   ins_pipe( ialu_reg );
 6520 %}
 6521 
 6522 instruct bytes_reverse_long(rRegL dst) %{
 6523   match(Set dst (ReverseBytesL dst));
 6524 
 6525   format %{ "bswapq  $dst" %}
 6526   ins_encode %{
 6527     __ bswapq($dst$$Register);
 6528   %}
 6529   ins_pipe( ialu_reg);
 6530 %}
 6531 
 6532 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 6533   match(Set dst (ReverseBytesUS dst));
 6534   effect(KILL cr);
 6535 
 6536   format %{ "bswapl  $dst\n\t"
 6537             "shrl    $dst,16\n\t" %}
 6538   ins_encode %{
 6539     __ bswapl($dst$$Register);
 6540     __ shrl($dst$$Register, 16);
 6541   %}
 6542   ins_pipe( ialu_reg );
 6543 %}
 6544 
 6545 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 6546   match(Set dst (ReverseBytesS dst));
 6547   effect(KILL cr);
 6548 
 6549   format %{ "bswapl  $dst\n\t"
 6550             "sar     $dst,16\n\t" %}
 6551   ins_encode %{
 6552     __ bswapl($dst$$Register);
 6553     __ sarl($dst$$Register, 16);
 6554   %}
 6555   ins_pipe( ialu_reg );
 6556 %}
 6557 
 6558 //---------- Zeros Count Instructions ------------------------------------------
 6559 
 6560 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6561   predicate(UseCountLeadingZerosInstruction);
 6562   match(Set dst (CountLeadingZerosI src));
 6563   effect(KILL cr);
 6564 
 6565   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6566   ins_encode %{
 6567     __ lzcntl($dst$$Register, $src$$Register);
 6568   %}
 6569   ins_pipe(ialu_reg);
 6570 %}
 6571 
 6572 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6573   predicate(UseCountLeadingZerosInstruction);
 6574   match(Set dst (CountLeadingZerosI (LoadI src)));
 6575   effect(KILL cr);
 6576   ins_cost(175);
 6577   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 6578   ins_encode %{
 6579     __ lzcntl($dst$$Register, $src$$Address);
 6580   %}
 6581   ins_pipe(ialu_reg_mem);
 6582 %}
 6583 
 6584 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 6585   predicate(!UseCountLeadingZerosInstruction);
 6586   match(Set dst (CountLeadingZerosI src));
 6587   effect(KILL cr);
 6588 
 6589   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 6590             "jnz     skip\n\t"
 6591             "movl    $dst, -1\n"
 6592       "skip:\n\t"
 6593             "negl    $dst\n\t"
 6594             "addl    $dst, 31" %}
 6595   ins_encode %{
 6596     Register Rdst = $dst$$Register;
 6597     Register Rsrc = $src$$Register;
 6598     Label skip;
 6599     __ bsrl(Rdst, Rsrc);
 6600     __ jccb(Assembler::notZero, skip);
 6601     __ movl(Rdst, -1);
 6602     __ bind(skip);
 6603     __ negl(Rdst);
 6604     __ addl(Rdst, BitsPerInt - 1);
 6605   %}
 6606   ins_pipe(ialu_reg);
 6607 %}
 6608 
 6609 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6610   predicate(UseCountLeadingZerosInstruction);
 6611   match(Set dst (CountLeadingZerosL src));
 6612   effect(KILL cr);
 6613 
 6614   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6615   ins_encode %{
 6616     __ lzcntq($dst$$Register, $src$$Register);
 6617   %}
 6618   ins_pipe(ialu_reg);
 6619 %}
 6620 
 6621 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6622   predicate(UseCountLeadingZerosInstruction);
 6623   match(Set dst (CountLeadingZerosL (LoadL src)));
 6624   effect(KILL cr);
 6625   ins_cost(175);
 6626   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 6627   ins_encode %{
 6628     __ lzcntq($dst$$Register, $src$$Address);
 6629   %}
 6630   ins_pipe(ialu_reg_mem);
 6631 %}
 6632 
 6633 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 6634   predicate(!UseCountLeadingZerosInstruction);
 6635   match(Set dst (CountLeadingZerosL src));
 6636   effect(KILL cr);
 6637 
 6638   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 6639             "jnz     skip\n\t"
 6640             "movl    $dst, -1\n"
 6641       "skip:\n\t"
 6642             "negl    $dst\n\t"
 6643             "addl    $dst, 63" %}
 6644   ins_encode %{
 6645     Register Rdst = $dst$$Register;
 6646     Register Rsrc = $src$$Register;
 6647     Label skip;
 6648     __ bsrq(Rdst, Rsrc);
 6649     __ jccb(Assembler::notZero, skip);
 6650     __ movl(Rdst, -1);
 6651     __ bind(skip);
 6652     __ negl(Rdst);
 6653     __ addl(Rdst, BitsPerLong - 1);
 6654   %}
 6655   ins_pipe(ialu_reg);
 6656 %}
 6657 
 6658 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6659   predicate(UseCountTrailingZerosInstruction);
 6660   match(Set dst (CountTrailingZerosI src));
 6661   effect(KILL cr);
 6662 
 6663   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6664   ins_encode %{
 6665     __ tzcntl($dst$$Register, $src$$Register);
 6666   %}
 6667   ins_pipe(ialu_reg);
 6668 %}
 6669 
 6670 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6671   predicate(UseCountTrailingZerosInstruction);
 6672   match(Set dst (CountTrailingZerosI (LoadI src)));
 6673   effect(KILL cr);
 6674   ins_cost(175);
 6675   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 6676   ins_encode %{
 6677     __ tzcntl($dst$$Register, $src$$Address);
 6678   %}
 6679   ins_pipe(ialu_reg_mem);
 6680 %}
 6681 
 6682 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 6683   predicate(!UseCountTrailingZerosInstruction);
 6684   match(Set dst (CountTrailingZerosI src));
 6685   effect(KILL cr);
 6686 
 6687   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 6688             "jnz     done\n\t"
 6689             "movl    $dst, 32\n"
 6690       "done:" %}
 6691   ins_encode %{
 6692     Register Rdst = $dst$$Register;
 6693     Label done;
 6694     __ bsfl(Rdst, $src$$Register);
 6695     __ jccb(Assembler::notZero, done);
 6696     __ movl(Rdst, BitsPerInt);
 6697     __ bind(done);
 6698   %}
 6699   ins_pipe(ialu_reg);
 6700 %}
 6701 
 6702 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6703   predicate(UseCountTrailingZerosInstruction);
 6704   match(Set dst (CountTrailingZerosL src));
 6705   effect(KILL cr);
 6706 
 6707   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6708   ins_encode %{
 6709     __ tzcntq($dst$$Register, $src$$Register);
 6710   %}
 6711   ins_pipe(ialu_reg);
 6712 %}
 6713 
 6714 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 6715   predicate(UseCountTrailingZerosInstruction);
 6716   match(Set dst (CountTrailingZerosL (LoadL src)));
 6717   effect(KILL cr);
 6718   ins_cost(175);
 6719   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 6720   ins_encode %{
 6721     __ tzcntq($dst$$Register, $src$$Address);
 6722   %}
 6723   ins_pipe(ialu_reg_mem);
 6724 %}
 6725 
 6726 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 6727   predicate(!UseCountTrailingZerosInstruction);
 6728   match(Set dst (CountTrailingZerosL src));
 6729   effect(KILL cr);
 6730 
 6731   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 6732             "jnz     done\n\t"
 6733             "movl    $dst, 64\n"
 6734       "done:" %}
 6735   ins_encode %{
 6736     Register Rdst = $dst$$Register;
 6737     Label done;
 6738     __ bsfq(Rdst, $src$$Register);
 6739     __ jccb(Assembler::notZero, done);
 6740     __ movl(Rdst, BitsPerLong);
 6741     __ bind(done);
 6742   %}
 6743   ins_pipe(ialu_reg);
 6744 %}
 6745 
 6746 //--------------- Reverse Operation Instructions ----------------
 6747 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 6748   predicate(!VM_Version::supports_gfni());
 6749   match(Set dst (ReverseI src));
 6750   effect(TEMP dst, TEMP rtmp, KILL cr);
 6751   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 6752   ins_encode %{
 6753     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 6754   %}
 6755   ins_pipe( ialu_reg );
 6756 %}
 6757 
 6758 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6759   predicate(VM_Version::supports_gfni());
 6760   match(Set dst (ReverseI src));
 6761   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6762   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6763   ins_encode %{
 6764     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 6765   %}
 6766   ins_pipe( ialu_reg );
 6767 %}
 6768 
 6769 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 6770   predicate(!VM_Version::supports_gfni());
 6771   match(Set dst (ReverseL src));
 6772   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 6773   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 6774   ins_encode %{
 6775     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 6776   %}
 6777   ins_pipe( ialu_reg );
 6778 %}
 6779 
 6780 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 6781   predicate(VM_Version::supports_gfni());
 6782   match(Set dst (ReverseL src));
 6783   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 6784   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 6785   ins_encode %{
 6786     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 6787   %}
 6788   ins_pipe( ialu_reg );
 6789 %}
 6790 
 6791 //---------- Population Count Instructions -------------------------------------
 6792 
 6793 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 6794   predicate(UsePopCountInstruction);
 6795   match(Set dst (PopCountI src));
 6796   effect(KILL cr);
 6797 
 6798   format %{ "popcnt  $dst, $src" %}
 6799   ins_encode %{
 6800     __ popcntl($dst$$Register, $src$$Register);
 6801   %}
 6802   ins_pipe(ialu_reg);
 6803 %}
 6804 
 6805 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6806   predicate(UsePopCountInstruction);
 6807   match(Set dst (PopCountI (LoadI mem)));
 6808   effect(KILL cr);
 6809 
 6810   format %{ "popcnt  $dst, $mem" %}
 6811   ins_encode %{
 6812     __ popcntl($dst$$Register, $mem$$Address);
 6813   %}
 6814   ins_pipe(ialu_reg);
 6815 %}
 6816 
 6817 // Note: Long.bitCount(long) returns an int.
 6818 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 6819   predicate(UsePopCountInstruction);
 6820   match(Set dst (PopCountL src));
 6821   effect(KILL cr);
 6822 
 6823   format %{ "popcnt  $dst, $src" %}
 6824   ins_encode %{
 6825     __ popcntq($dst$$Register, $src$$Register);
 6826   %}
 6827   ins_pipe(ialu_reg);
 6828 %}
 6829 
 6830 // Note: Long.bitCount(long) returns an int.
 6831 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 6832   predicate(UsePopCountInstruction);
 6833   match(Set dst (PopCountL (LoadL mem)));
 6834   effect(KILL cr);
 6835 
 6836   format %{ "popcnt  $dst, $mem" %}
 6837   ins_encode %{
 6838     __ popcntq($dst$$Register, $mem$$Address);
 6839   %}
 6840   ins_pipe(ialu_reg);
 6841 %}
 6842 
 6843 
 6844 //----------MemBar Instructions-----------------------------------------------
 6845 // Memory barrier flavors
 6846 
 6847 instruct membar_acquire()
 6848 %{
 6849   match(MemBarAcquire);
 6850   match(LoadFence);
 6851   ins_cost(0);
 6852 
 6853   size(0);
 6854   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6855   ins_encode();
 6856   ins_pipe(empty);
 6857 %}
 6858 
 6859 instruct membar_acquire_lock()
 6860 %{
 6861   match(MemBarAcquireLock);
 6862   ins_cost(0);
 6863 
 6864   size(0);
 6865   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6866   ins_encode();
 6867   ins_pipe(empty);
 6868 %}
 6869 
 6870 instruct membar_release()
 6871 %{
 6872   match(MemBarRelease);
 6873   match(StoreFence);
 6874   ins_cost(0);
 6875 
 6876   size(0);
 6877   format %{ "MEMBAR-release ! (empty encoding)" %}
 6878   ins_encode();
 6879   ins_pipe(empty);
 6880 %}
 6881 
 6882 instruct membar_release_lock()
 6883 %{
 6884   match(MemBarReleaseLock);
 6885   ins_cost(0);
 6886 
 6887   size(0);
 6888   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6889   ins_encode();
 6890   ins_pipe(empty);
 6891 %}
 6892 
 6893 instruct membar_volatile(rFlagsReg cr) %{
 6894   match(MemBarVolatile);
 6895   effect(KILL cr);
 6896   ins_cost(400);
 6897 
 6898   format %{
 6899     $$template
 6900     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 6901   %}
 6902   ins_encode %{
 6903     __ membar(Assembler::StoreLoad);
 6904   %}
 6905   ins_pipe(pipe_slow);
 6906 %}
 6907 
 6908 instruct unnecessary_membar_volatile()
 6909 %{
 6910   match(MemBarVolatile);
 6911   predicate(Matcher::post_store_load_barrier(n));
 6912   ins_cost(0);
 6913 
 6914   size(0);
 6915   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6916   ins_encode();
 6917   ins_pipe(empty);
 6918 %}
 6919 
 6920 instruct membar_storestore() %{
 6921   match(MemBarStoreStore);
 6922   match(StoreStoreFence);
 6923   ins_cost(0);
 6924 
 6925   size(0);
 6926   format %{ "MEMBAR-storestore (empty encoding)" %}
 6927   ins_encode( );
 6928   ins_pipe(empty);
 6929 %}
 6930 
 6931 //----------Move Instructions--------------------------------------------------
 6932 
 6933 instruct castX2P(rRegP dst, rRegL src)
 6934 %{
 6935   match(Set dst (CastX2P src));
 6936 
 6937   format %{ "movq    $dst, $src\t# long->ptr" %}
 6938   ins_encode %{
 6939     if ($dst$$reg != $src$$reg) {
 6940       __ movptr($dst$$Register, $src$$Register);
 6941     }
 6942   %}
 6943   ins_pipe(ialu_reg_reg); // XXX
 6944 %}
 6945 
 6946 instruct castN2X(rRegL dst, rRegN src)
 6947 %{
 6948   match(Set dst (CastP2X src));
 6949 
 6950   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6951   ins_encode %{
 6952     if ($dst$$reg != $src$$reg) {
 6953       __ movptr($dst$$Register, $src$$Register);
 6954     }
 6955   %}
 6956   ins_pipe(ialu_reg_reg); // XXX
 6957 %}
 6958 
 6959 instruct castP2X(rRegL dst, rRegP src)
 6960 %{
 6961   match(Set dst (CastP2X src));
 6962 
 6963   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6964   ins_encode %{
 6965     if ($dst$$reg != $src$$reg) {
 6966       __ movptr($dst$$Register, $src$$Register);
 6967     }
 6968   %}
 6969   ins_pipe(ialu_reg_reg); // XXX
 6970 %}
 6971 
 6972 // Convert oop into int for vectors alignment masking
 6973 instruct convP2I(rRegI dst, rRegP src)
 6974 %{
 6975   match(Set dst (ConvL2I (CastP2X src)));
 6976 
 6977   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6978   ins_encode %{
 6979     __ movl($dst$$Register, $src$$Register);
 6980   %}
 6981   ins_pipe(ialu_reg_reg); // XXX
 6982 %}
 6983 
 6984 // Convert compressed oop into int for vectors alignment masking
 6985 // in case of 32bit oops (heap < 4Gb).
 6986 instruct convN2I(rRegI dst, rRegN src)
 6987 %{
 6988   predicate(CompressedOops::shift() == 0);
 6989   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6990 
 6991   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6992   ins_encode %{
 6993     __ movl($dst$$Register, $src$$Register);
 6994   %}
 6995   ins_pipe(ialu_reg_reg); // XXX
 6996 %}
 6997 
 6998 // Convert oop pointer into compressed form
 6999 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 7000   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 7001   match(Set dst (EncodeP src));
 7002   effect(KILL cr);
 7003   format %{ "encode_heap_oop $dst,$src" %}
 7004   ins_encode %{
 7005     Register s = $src$$Register;
 7006     Register d = $dst$$Register;
 7007     if (s != d) {
 7008       __ movq(d, s);
 7009     }
 7010     __ encode_heap_oop(d);
 7011   %}
 7012   ins_pipe(ialu_reg_long);
 7013 %}
 7014 
 7015 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7016   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 7017   match(Set dst (EncodeP src));
 7018   effect(KILL cr);
 7019   format %{ "encode_heap_oop_not_null $dst,$src" %}
 7020   ins_encode %{
 7021     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 7022   %}
 7023   ins_pipe(ialu_reg_long);
 7024 %}
 7025 
 7026 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 7027   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 7028             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 7029   match(Set dst (DecodeN src));
 7030   effect(KILL cr);
 7031   format %{ "decode_heap_oop $dst,$src" %}
 7032   ins_encode %{
 7033     Register s = $src$$Register;
 7034     Register d = $dst$$Register;
 7035     if (s != d) {
 7036       __ movq(d, s);
 7037     }
 7038     __ decode_heap_oop(d);
 7039   %}
 7040   ins_pipe(ialu_reg_long);
 7041 %}
 7042 
 7043 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7044   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 7045             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 7046   match(Set dst (DecodeN src));
 7047   effect(KILL cr);
 7048   format %{ "decode_heap_oop_not_null $dst,$src" %}
 7049   ins_encode %{
 7050     Register s = $src$$Register;
 7051     Register d = $dst$$Register;
 7052     if (s != d) {
 7053       __ decode_heap_oop_not_null(d, s);
 7054     } else {
 7055       __ decode_heap_oop_not_null(d);
 7056     }
 7057   %}
 7058   ins_pipe(ialu_reg_long);
 7059 %}
 7060 
 7061 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 7062   match(Set dst (EncodePKlass src));
 7063   effect(TEMP dst, KILL cr);
 7064   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 7065   ins_encode %{
 7066     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7067   %}
 7068   ins_pipe(ialu_reg_long);
 7069 %}
 7070 
 7071 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 7072   match(Set dst (DecodeNKlass src));
 7073   effect(TEMP dst, KILL cr);
 7074   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 7075   ins_encode %{
 7076     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 7077   %}
 7078   ins_pipe(ialu_reg_long);
 7079 %}
 7080 
 7081 //----------Conditional Move---------------------------------------------------
 7082 // Jump
 7083 // dummy instruction for generating temp registers
 7084 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 7085   match(Jump (LShiftL switch_val shift));
 7086   ins_cost(350);
 7087   predicate(false);
 7088   effect(TEMP dest);
 7089 
 7090   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7091             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 7092   ins_encode %{
 7093     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7094     // to do that and the compiler is using that register as one it can allocate.
 7095     // So we build it all by hand.
 7096     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 7097     // ArrayAddress dispatch(table, index);
 7098     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 7099     __ lea($dest$$Register, $constantaddress);
 7100     __ jmp(dispatch);
 7101   %}
 7102   ins_pipe(pipe_jmp);
 7103 %}
 7104 
 7105 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 7106   match(Jump (AddL (LShiftL switch_val shift) offset));
 7107   ins_cost(350);
 7108   effect(TEMP dest);
 7109 
 7110   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7111             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 7112   ins_encode %{
 7113     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7114     // to do that and the compiler is using that register as one it can allocate.
 7115     // So we build it all by hand.
 7116     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7117     // ArrayAddress dispatch(table, index);
 7118     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 7119     __ lea($dest$$Register, $constantaddress);
 7120     __ jmp(dispatch);
 7121   %}
 7122   ins_pipe(pipe_jmp);
 7123 %}
 7124 
 7125 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 7126   match(Jump switch_val);
 7127   ins_cost(350);
 7128   effect(TEMP dest);
 7129 
 7130   format %{ "leaq    $dest, [$constantaddress]\n\t"
 7131             "jmp     [$dest + $switch_val]\n\t" %}
 7132   ins_encode %{
 7133     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 7134     // to do that and the compiler is using that register as one it can allocate.
 7135     // So we build it all by hand.
 7136     // Address index(noreg, switch_reg, Address::times_1);
 7137     // ArrayAddress dispatch(table, index);
 7138     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 7139     __ lea($dest$$Register, $constantaddress);
 7140     __ jmp(dispatch);
 7141   %}
 7142   ins_pipe(pipe_jmp);
 7143 %}
 7144 
 7145 // Conditional move
 7146 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7147 %{
 7148   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7149   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7150 
 7151   ins_cost(100); // XXX
 7152   format %{ "setbn$cop $dst\t# signed, int" %}
 7153   ins_encode %{
 7154     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7155     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7156   %}
 7157   ins_pipe(ialu_reg);
 7158 %}
 7159 
 7160 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 7161 %{
 7162   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7163 
 7164   ins_cost(200); // XXX
 7165   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7166   ins_encode %{
 7167     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7168   %}
 7169   ins_pipe(pipe_cmov_reg);
 7170 %}
 7171 
 7172 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7173 %{
 7174   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7175   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7176 
 7177   ins_cost(100); // XXX
 7178   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7179   ins_encode %{
 7180     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7181     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7182   %}
 7183   ins_pipe(ialu_reg);
 7184 %}
 7185 
 7186 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 7187   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7188 
 7189   ins_cost(200); // XXX
 7190   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7191   ins_encode %{
 7192     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7193   %}
 7194   ins_pipe(pipe_cmov_reg);
 7195 %}
 7196 
 7197 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7198 %{
 7199   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 7200   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7201 
 7202   ins_cost(100); // XXX
 7203   format %{ "setbn$cop $dst\t# unsigned, int" %}
 7204   ins_encode %{
 7205     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7206     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7207   %}
 7208   ins_pipe(ialu_reg);
 7209 %}
 7210 
 7211 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7212   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7213   ins_cost(200);
 7214   expand %{
 7215     cmovI_regU(cop, cr, dst, src);
 7216   %}
 7217 %}
 7218 
 7219 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7220   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7221   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 7222 
 7223   ins_cost(200); // XXX
 7224   format %{ "cmovpl  $dst, $src\n\t"
 7225             "cmovnel $dst, $src" %}
 7226   ins_encode %{
 7227     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7228     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7229   %}
 7230   ins_pipe(pipe_cmov_reg);
 7231 %}
 7232 
 7233 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7234 // inputs of the CMove
 7235 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 7236   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7237   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 7238 
 7239   ins_cost(200); // XXX
 7240   format %{ "cmovpl  $dst, $src\n\t"
 7241             "cmovnel $dst, $src" %}
 7242   ins_encode %{
 7243     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7244     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7245   %}
 7246   ins_pipe(pipe_cmov_reg);
 7247 %}
 7248 
 7249 // Conditional move
 7250 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 7251   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7252 
 7253   ins_cost(250); // XXX
 7254   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 7255   ins_encode %{
 7256     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7257   %}
 7258   ins_pipe(pipe_cmov_mem);
 7259 %}
 7260 
 7261 // Conditional move
 7262 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 7263 %{
 7264   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7265 
 7266   ins_cost(250); // XXX
 7267   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 7268   ins_encode %{
 7269     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7270   %}
 7271   ins_pipe(pipe_cmov_mem);
 7272 %}
 7273 
 7274 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 7275   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 7276   ins_cost(250);
 7277   expand %{
 7278     cmovI_memU(cop, cr, dst, src);
 7279   %}
 7280 %}
 7281 
 7282 // Conditional move
 7283 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 7284 %{
 7285   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7286 
 7287   ins_cost(200); // XXX
 7288   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 7289   ins_encode %{
 7290     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7291   %}
 7292   ins_pipe(pipe_cmov_reg);
 7293 %}
 7294 
 7295 // Conditional move
 7296 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 7297 %{
 7298   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7299 
 7300   ins_cost(200); // XXX
 7301   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 7302   ins_encode %{
 7303     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7304   %}
 7305   ins_pipe(pipe_cmov_reg);
 7306 %}
 7307 
 7308 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7309   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7310   ins_cost(200);
 7311   expand %{
 7312     cmovN_regU(cop, cr, dst, src);
 7313   %}
 7314 %}
 7315 
 7316 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7317   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7318   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 7319 
 7320   ins_cost(200); // XXX
 7321   format %{ "cmovpl  $dst, $src\n\t"
 7322             "cmovnel $dst, $src" %}
 7323   ins_encode %{
 7324     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7325     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7326   %}
 7327   ins_pipe(pipe_cmov_reg);
 7328 %}
 7329 
 7330 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7331 // inputs of the CMove
 7332 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 7333   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7334   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 7335 
 7336   ins_cost(200); // XXX
 7337   format %{ "cmovpl  $dst, $src\n\t"
 7338             "cmovnel $dst, $src" %}
 7339   ins_encode %{
 7340     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 7341     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 7342   %}
 7343   ins_pipe(pipe_cmov_reg);
 7344 %}
 7345 
 7346 // Conditional move
 7347 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 7348 %{
 7349   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7350 
 7351   ins_cost(200); // XXX
 7352   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 7353   ins_encode %{
 7354     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7355   %}
 7356   ins_pipe(pipe_cmov_reg);  // XXX
 7357 %}
 7358 
 7359 // Conditional move
 7360 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 7361 %{
 7362   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7363 
 7364   ins_cost(200); // XXX
 7365   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 7366   ins_encode %{
 7367     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7368   %}
 7369   ins_pipe(pipe_cmov_reg); // XXX
 7370 %}
 7371 
 7372 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7373   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7374   ins_cost(200);
 7375   expand %{
 7376     cmovP_regU(cop, cr, dst, src);
 7377   %}
 7378 %}
 7379 
 7380 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7381   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7382   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 7383 
 7384   ins_cost(200); // XXX
 7385   format %{ "cmovpq  $dst, $src\n\t"
 7386             "cmovneq $dst, $src" %}
 7387   ins_encode %{
 7388     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7389     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7390   %}
 7391   ins_pipe(pipe_cmov_reg);
 7392 %}
 7393 
 7394 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7395 // inputs of the CMove
 7396 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 7397   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7398   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 7399 
 7400   ins_cost(200); // XXX
 7401   format %{ "cmovpq  $dst, $src\n\t"
 7402             "cmovneq $dst, $src" %}
 7403   ins_encode %{
 7404     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7405     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7406   %}
 7407   ins_pipe(pipe_cmov_reg);
 7408 %}
 7409 
 7410 // DISABLED: Requires the ADLC to emit a bottom_type call that
 7411 // correctly meets the two pointer arguments; one is an incoming
 7412 // register but the other is a memory operand.  ALSO appears to
 7413 // be buggy with implicit null checks.
 7414 //
 7415 //// Conditional move
 7416 //instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
 7417 //%{
 7418 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7419 //  ins_cost(250);
 7420 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7421 //  opcode(0x0F,0x40);
 7422 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7423 //  ins_pipe( pipe_cmov_mem );
 7424 //%}
 7425 //
 7426 //// Conditional move
 7427 //instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
 7428 //%{
 7429 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 7430 //  ins_cost(250);
 7431 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 7432 //  opcode(0x0F,0x40);
 7433 //  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
 7434 //  ins_pipe( pipe_cmov_mem );
 7435 //%}
 7436 
 7437 instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 7438 %{
 7439   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7440   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7441 
 7442   ins_cost(100); // XXX
 7443   format %{ "setbn$cop $dst\t# signed, long" %}
 7444   ins_encode %{
 7445     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7446     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7447   %}
 7448   ins_pipe(ialu_reg);
 7449 %}
 7450 
 7451 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 7452 %{
 7453   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7454 
 7455   ins_cost(200); // XXX
 7456   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7457   ins_encode %{
 7458     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7459   %}
 7460   ins_pipe(pipe_cmov_reg);  // XXX
 7461 %}
 7462 
 7463 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 7464 %{
 7465   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7466 
 7467   ins_cost(200); // XXX
 7468   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 7469   ins_encode %{
 7470     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7471   %}
 7472   ins_pipe(pipe_cmov_mem);  // XXX
 7473 %}
 7474 
 7475 instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 7476 %{
 7477   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7478   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7479 
 7480   ins_cost(100); // XXX
 7481   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7482   ins_encode %{
 7483     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7484     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7485   %}
 7486   ins_pipe(ialu_reg);
 7487 %}
 7488 
 7489 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 7490 %{
 7491   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7492 
 7493   ins_cost(200); // XXX
 7494   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7495   ins_encode %{
 7496     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 7497   %}
 7498   ins_pipe(pipe_cmov_reg); // XXX
 7499 %}
 7500 
 7501 instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 7502 %{
 7503   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 7504   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7505 
 7506   ins_cost(100); // XXX
 7507   format %{ "setbn$cop $dst\t# unsigned, long" %}
 7508   ins_encode %{
 7509     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 7510     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 7511   %}
 7512   ins_pipe(ialu_reg);
 7513 %}
 7514 
 7515 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7516   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7517   ins_cost(200);
 7518   expand %{
 7519     cmovL_regU(cop, cr, dst, src);
 7520   %}
 7521 %}
 7522 
 7523 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7524   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 7525   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7526 
 7527   ins_cost(200); // XXX
 7528   format %{ "cmovpq  $dst, $src\n\t"
 7529             "cmovneq $dst, $src" %}
 7530   ins_encode %{
 7531     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7532     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7533   %}
 7534   ins_pipe(pipe_cmov_reg);
 7535 %}
 7536 
 7537 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 7538 // inputs of the CMove
 7539 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 7540   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 7541   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 7542 
 7543   ins_cost(200); // XXX
 7544   format %{ "cmovpq  $dst, $src\n\t"
 7545             "cmovneq $dst, $src" %}
 7546   ins_encode %{
 7547     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 7548     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 7549   %}
 7550   ins_pipe(pipe_cmov_reg);
 7551 %}
 7552 
 7553 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 7554 %{
 7555   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7556 
 7557   ins_cost(200); // XXX
 7558   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 7559   ins_encode %{
 7560     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 7561   %}
 7562   ins_pipe(pipe_cmov_mem); // XXX
 7563 %}
 7564 
 7565 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 7566   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 7567   ins_cost(200);
 7568   expand %{
 7569     cmovL_memU(cop, cr, dst, src);
 7570   %}
 7571 %}
 7572 
 7573 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 7574 %{
 7575   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7576 
 7577   ins_cost(200); // XXX
 7578   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7579             "movss     $dst, $src\n"
 7580     "skip:" %}
 7581   ins_encode %{
 7582     Label Lskip;
 7583     // Invert sense of branch from sense of CMOV
 7584     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7585     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7586     __ bind(Lskip);
 7587   %}
 7588   ins_pipe(pipe_slow);
 7589 %}
 7590 
 7591 // instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
 7592 // %{
 7593 //   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));
 7594 
 7595 //   ins_cost(200); // XXX
 7596 //   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 7597 //             "movss     $dst, $src\n"
 7598 //     "skip:" %}
 7599 //   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
 7600 //   ins_pipe(pipe_slow);
 7601 // %}
 7602 
 7603 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 7604 %{
 7605   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7606 
 7607   ins_cost(200); // XXX
 7608   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 7609             "movss     $dst, $src\n"
 7610     "skip:" %}
 7611   ins_encode %{
 7612     Label Lskip;
 7613     // Invert sense of branch from sense of CMOV
 7614     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7615     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 7616     __ bind(Lskip);
 7617   %}
 7618   ins_pipe(pipe_slow);
 7619 %}
 7620 
 7621 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 7622   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7623   ins_cost(200);
 7624   expand %{
 7625     cmovF_regU(cop, cr, dst, src);
 7626   %}
 7627 %}
 7628 
 7629 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 7630 %{
 7631   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7632 
 7633   ins_cost(200); // XXX
 7634   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 7635             "movsd     $dst, $src\n"
 7636     "skip:" %}
 7637   ins_encode %{
 7638     Label Lskip;
 7639     // Invert sense of branch from sense of CMOV
 7640     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7641     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7642     __ bind(Lskip);
 7643   %}
 7644   ins_pipe(pipe_slow);
 7645 %}
 7646 
 7647 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 7648 %{
 7649   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7650 
 7651   ins_cost(200); // XXX
 7652   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 7653             "movsd     $dst, $src\n"
 7654     "skip:" %}
 7655   ins_encode %{
 7656     Label Lskip;
 7657     // Invert sense of branch from sense of CMOV
 7658     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 7659     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7660     __ bind(Lskip);
 7661   %}
 7662   ins_pipe(pipe_slow);
 7663 %}
 7664 
 7665 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 7666   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7667   ins_cost(200);
 7668   expand %{
 7669     cmovD_regU(cop, cr, dst, src);
 7670   %}
 7671 %}
 7672 
 7673 //----------Arithmetic Instructions--------------------------------------------
 7674 //----------Addition Instructions----------------------------------------------
 7675 
 7676 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 7677 %{
 7678   match(Set dst (AddI dst src));
 7679   effect(KILL cr);
 7680 
 7681   format %{ "addl    $dst, $src\t# int" %}
 7682   ins_encode %{
 7683     __ addl($dst$$Register, $src$$Register);
 7684   %}
 7685   ins_pipe(ialu_reg_reg);
 7686 %}
 7687 
 7688 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 7689 %{
 7690   match(Set dst (AddI dst src));
 7691   effect(KILL cr);
 7692 
 7693   format %{ "addl    $dst, $src\t# int" %}
 7694   ins_encode %{
 7695     __ addl($dst$$Register, $src$$constant);
 7696   %}
 7697   ins_pipe( ialu_reg );
 7698 %}
 7699 
 7700 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 7701 %{
 7702   match(Set dst (AddI dst (LoadI src)));
 7703   effect(KILL cr);
 7704 
 7705   ins_cost(150); // XXX
 7706   format %{ "addl    $dst, $src\t# int" %}
 7707   ins_encode %{
 7708     __ addl($dst$$Register, $src$$Address);
 7709   %}
 7710   ins_pipe(ialu_reg_mem);
 7711 %}
 7712 
 7713 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 7714 %{
 7715   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7716   effect(KILL cr);
 7717 
 7718   ins_cost(150); // XXX
 7719   format %{ "addl    $dst, $src\t# int" %}
 7720   ins_encode %{
 7721     __ addl($dst$$Address, $src$$Register);
 7722   %}
 7723   ins_pipe(ialu_mem_reg);
 7724 %}
 7725 
 7726 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 7727 %{
 7728   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7729   effect(KILL cr);
 7730 
 7731   ins_cost(125); // XXX
 7732   format %{ "addl    $dst, $src\t# int" %}
 7733   ins_encode %{
 7734     __ addl($dst$$Address, $src$$constant);
 7735   %}
 7736   ins_pipe(ialu_mem_imm);
 7737 %}
 7738 
 7739 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 7740 %{
 7741   predicate(UseIncDec);
 7742   match(Set dst (AddI dst src));
 7743   effect(KILL cr);
 7744 
 7745   format %{ "incl    $dst\t# int" %}
 7746   ins_encode %{
 7747     __ incrementl($dst$$Register);
 7748   %}
 7749   ins_pipe(ialu_reg);
 7750 %}
 7751 
 7752 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 7753 %{
 7754   predicate(UseIncDec);
 7755   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7756   effect(KILL cr);
 7757 
 7758   ins_cost(125); // XXX
 7759   format %{ "incl    $dst\t# int" %}
 7760   ins_encode %{
 7761     __ incrementl($dst$$Address);
 7762   %}
 7763   ins_pipe(ialu_mem_imm);
 7764 %}
 7765 
 7766 // XXX why does that use AddI
 7767 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 7768 %{
 7769   predicate(UseIncDec);
 7770   match(Set dst (AddI dst src));
 7771   effect(KILL cr);
 7772 
 7773   format %{ "decl    $dst\t# int" %}
 7774   ins_encode %{
 7775     __ decrementl($dst$$Register);
 7776   %}
 7777   ins_pipe(ialu_reg);
 7778 %}
 7779 
 7780 // XXX why does that use AddI
 7781 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
 7782 %{
 7783   predicate(UseIncDec);
 7784   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7785   effect(KILL cr);
 7786 
 7787   ins_cost(125); // XXX
 7788   format %{ "decl    $dst\t# int" %}
 7789   ins_encode %{
 7790     __ decrementl($dst$$Address);
 7791   %}
 7792   ins_pipe(ialu_mem_imm);
 7793 %}
 7794 
 7795 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
 7796 %{
 7797   predicate(VM_Version::supports_fast_2op_lea());
 7798   match(Set dst (AddI (LShiftI index scale) disp));
 7799 
 7800   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
 7801   ins_encode %{
 7802     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7803     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7804   %}
 7805   ins_pipe(ialu_reg_reg);
 7806 %}
 7807 
 7808 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
 7809 %{
 7810   predicate(VM_Version::supports_fast_3op_lea());
 7811   match(Set dst (AddI (AddI base index) disp));
 7812 
 7813   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
 7814   ins_encode %{
 7815     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7816   %}
 7817   ins_pipe(ialu_reg_reg);
 7818 %}
 7819 
 7820 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
 7821 %{
 7822   predicate(VM_Version::supports_fast_2op_lea());
 7823   match(Set dst (AddI base (LShiftI index scale)));
 7824 
 7825   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
 7826   ins_encode %{
 7827     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7828     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7829   %}
 7830   ins_pipe(ialu_reg_reg);
 7831 %}
 7832 
 7833 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
 7834 %{
 7835   predicate(VM_Version::supports_fast_3op_lea());
 7836   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
 7837 
 7838   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
 7839   ins_encode %{
 7840     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7841     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 7842   %}
 7843   ins_pipe(ialu_reg_reg);
 7844 %}
 7845 
 7846 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 7847 %{
 7848   match(Set dst (AddL dst src));
 7849   effect(KILL cr);
 7850 
 7851   format %{ "addq    $dst, $src\t# long" %}
 7852   ins_encode %{
 7853     __ addq($dst$$Register, $src$$Register);
 7854   %}
 7855   ins_pipe(ialu_reg_reg);
 7856 %}
 7857 
 7858 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
 7859 %{
 7860   match(Set dst (AddL dst src));
 7861   effect(KILL cr);
 7862 
 7863   format %{ "addq    $dst, $src\t# long" %}
 7864   ins_encode %{
 7865     __ addq($dst$$Register, $src$$constant);
 7866   %}
 7867   ins_pipe( ialu_reg );
 7868 %}
 7869 
 7870 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 7871 %{
 7872   match(Set dst (AddL dst (LoadL src)));
 7873   effect(KILL cr);
 7874 
 7875   ins_cost(150); // XXX
 7876   format %{ "addq    $dst, $src\t# long" %}
 7877   ins_encode %{
 7878     __ addq($dst$$Register, $src$$Address);
 7879   %}
 7880   ins_pipe(ialu_reg_mem);
 7881 %}
 7882 
 7883 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 7884 %{
 7885   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7886   effect(KILL cr);
 7887 
 7888   ins_cost(150); // XXX
 7889   format %{ "addq    $dst, $src\t# long" %}
 7890   ins_encode %{
 7891     __ addq($dst$$Address, $src$$Register);
 7892   %}
 7893   ins_pipe(ialu_mem_reg);
 7894 %}
 7895 
 7896 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
 7897 %{
 7898   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7899   effect(KILL cr);
 7900 
 7901   ins_cost(125); // XXX
 7902   format %{ "addq    $dst, $src\t# long" %}
 7903   ins_encode %{
 7904     __ addq($dst$$Address, $src$$constant);
 7905   %}
 7906   ins_pipe(ialu_mem_imm);
 7907 %}
 7908 
 7909 instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
 7910 %{
 7911   predicate(UseIncDec);
 7912   match(Set dst (AddL dst src));
 7913   effect(KILL cr);
 7914 
 7915   format %{ "incq    $dst\t# long" %}
 7916   ins_encode %{
 7917     __ incrementq($dst$$Register);
 7918   %}
 7919   ins_pipe(ialu_reg);
 7920 %}
 7921 
 7922 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
 7923 %{
 7924   predicate(UseIncDec);
 7925   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7926   effect(KILL cr);
 7927 
 7928   ins_cost(125); // XXX
 7929   format %{ "incq    $dst\t# long" %}
 7930   ins_encode %{
 7931     __ incrementq($dst$$Address);
 7932   %}
 7933   ins_pipe(ialu_mem_imm);
 7934 %}
 7935 
 7936 // XXX why does that use AddL
 7937 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
 7938 %{
 7939   predicate(UseIncDec);
 7940   match(Set dst (AddL dst src));
 7941   effect(KILL cr);
 7942 
 7943   format %{ "decq    $dst\t# long" %}
 7944   ins_encode %{
 7945     __ decrementq($dst$$Register);
 7946   %}
 7947   ins_pipe(ialu_reg);
 7948 %}
 7949 
 7950 // XXX why does that use AddL
 7951 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
 7952 %{
 7953   predicate(UseIncDec);
 7954   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
 7955   effect(KILL cr);
 7956 
 7957   ins_cost(125); // XXX
 7958   format %{ "decq    $dst\t# long" %}
 7959   ins_encode %{
 7960     __ decrementq($dst$$Address);
 7961   %}
 7962   ins_pipe(ialu_mem_imm);
 7963 %}
 7964 
 7965 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
 7966 %{
 7967   predicate(VM_Version::supports_fast_2op_lea());
 7968   match(Set dst (AddL (LShiftL index scale) disp));
 7969 
 7970   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
 7971   ins_encode %{
 7972     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7973     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
 7974   %}
 7975   ins_pipe(ialu_reg_reg);
 7976 %}
 7977 
 7978 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
 7979 %{
 7980   predicate(VM_Version::supports_fast_3op_lea());
 7981   match(Set dst (AddL (AddL base index) disp));
 7982 
 7983   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
 7984   ins_encode %{
 7985     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
 7986   %}
 7987   ins_pipe(ialu_reg_reg);
 7988 %}
 7989 
 7990 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
 7991 %{
 7992   predicate(VM_Version::supports_fast_2op_lea());
 7993   match(Set dst (AddL base (LShiftL index scale)));
 7994 
 7995   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
 7996   ins_encode %{
 7997     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 7998     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
 7999   %}
 8000   ins_pipe(ialu_reg_reg);
 8001 %}
 8002 
 8003 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
 8004 %{
 8005   predicate(VM_Version::supports_fast_3op_lea());
 8006   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
 8007 
 8008   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
 8009   ins_encode %{
 8010     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
 8011     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
 8012   %}
 8013   ins_pipe(ialu_reg_reg);
 8014 %}
 8015 
 8016 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
 8017 %{
 8018   match(Set dst (AddP dst src));
 8019   effect(KILL cr);
 8020 
 8021   format %{ "addq    $dst, $src\t# ptr" %}
 8022   ins_encode %{
 8023     __ addq($dst$$Register, $src$$Register);
 8024   %}
 8025   ins_pipe(ialu_reg_reg);
 8026 %}
 8027 
 8028 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
 8029 %{
 8030   match(Set dst (AddP dst src));
 8031   effect(KILL cr);
 8032 
 8033   format %{ "addq    $dst, $src\t# ptr" %}
 8034   ins_encode %{
 8035     __ addq($dst$$Register, $src$$constant);
 8036   %}
 8037   ins_pipe( ialu_reg );
 8038 %}
 8039 
 8040 // XXX addP mem ops ????
 8041 
 8042 instruct checkCastPP(rRegP dst)
 8043 %{
 8044   match(Set dst (CheckCastPP dst));
 8045 
 8046   size(0);
 8047   format %{ "# checkcastPP of $dst" %}
 8048   ins_encode(/* empty encoding */);
 8049   ins_pipe(empty);
 8050 %}
 8051 
 8052 instruct castPP(rRegP dst)
 8053 %{
 8054   match(Set dst (CastPP dst));
 8055 
 8056   size(0);
 8057   format %{ "# castPP of $dst" %}
 8058   ins_encode(/* empty encoding */);
 8059   ins_pipe(empty);
 8060 %}
 8061 
 8062 instruct castII(rRegI dst)
 8063 %{
 8064   match(Set dst (CastII dst));
 8065 
 8066   size(0);
 8067   format %{ "# castII of $dst" %}
 8068   ins_encode(/* empty encoding */);
 8069   ins_cost(0);
 8070   ins_pipe(empty);
 8071 %}
 8072 
 8073 instruct castLL(rRegL dst)
 8074 %{
 8075   match(Set dst (CastLL dst));
 8076 
 8077   size(0);
 8078   format %{ "# castLL of $dst" %}
 8079   ins_encode(/* empty encoding */);
 8080   ins_cost(0);
 8081   ins_pipe(empty);
 8082 %}
 8083 
 8084 instruct castFF(regF dst)
 8085 %{
 8086   match(Set dst (CastFF dst));
 8087 
 8088   size(0);
 8089   format %{ "# castFF of $dst" %}
 8090   ins_encode(/* empty encoding */);
 8091   ins_cost(0);
 8092   ins_pipe(empty);
 8093 %}
 8094 
 8095 instruct castDD(regD dst)
 8096 %{
 8097   match(Set dst (CastDD dst));
 8098 
 8099   size(0);
 8100   format %{ "# castDD of $dst" %}
 8101   ins_encode(/* empty encoding */);
 8102   ins_cost(0);
 8103   ins_pipe(empty);
 8104 %}
 8105 
 8106 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 8107 instruct compareAndSwapP(rRegI res,
 8108                          memory mem_ptr,
 8109                          rax_RegP oldval, rRegP newval,
 8110                          rFlagsReg cr)
 8111 %{
 8112   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8113   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 8114   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 8115   effect(KILL cr, KILL oldval);
 8116 
 8117   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8118             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8119             "sete    $res\n\t"
 8120             "movzbl  $res, $res" %}
 8121   ins_encode %{
 8122     __ lock();
 8123     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8124     __ sete($res$$Register);
 8125     __ movzbl($res$$Register, $res$$Register);
 8126   %}
 8127   ins_pipe( pipe_cmpxchg );
 8128 %}
 8129 
 8130 instruct compareAndSwapL(rRegI res,
 8131                          memory mem_ptr,
 8132                          rax_RegL oldval, rRegL newval,
 8133                          rFlagsReg cr)
 8134 %{
 8135   predicate(VM_Version::supports_cx8());
 8136   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 8137   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 8138   effect(KILL cr, KILL oldval);
 8139 
 8140   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8141             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8142             "sete    $res\n\t"
 8143             "movzbl  $res, $res" %}
 8144   ins_encode %{
 8145     __ lock();
 8146     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8147     __ sete($res$$Register);
 8148     __ movzbl($res$$Register, $res$$Register);
 8149   %}
 8150   ins_pipe( pipe_cmpxchg );
 8151 %}
 8152 
 8153 instruct compareAndSwapI(rRegI res,
 8154                          memory mem_ptr,
 8155                          rax_RegI oldval, rRegI newval,
 8156                          rFlagsReg cr)
 8157 %{
 8158   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 8159   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 8160   effect(KILL cr, KILL oldval);
 8161 
 8162   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8163             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8164             "sete    $res\n\t"
 8165             "movzbl  $res, $res" %}
 8166   ins_encode %{
 8167     __ lock();
 8168     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8169     __ sete($res$$Register);
 8170     __ movzbl($res$$Register, $res$$Register);
 8171   %}
 8172   ins_pipe( pipe_cmpxchg );
 8173 %}
 8174 
 8175 instruct compareAndSwapB(rRegI res,
 8176                          memory mem_ptr,
 8177                          rax_RegI oldval, rRegI newval,
 8178                          rFlagsReg cr)
 8179 %{
 8180   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 8181   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 8182   effect(KILL cr, KILL oldval);
 8183 
 8184   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8185             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8186             "sete    $res\n\t"
 8187             "movzbl  $res, $res" %}
 8188   ins_encode %{
 8189     __ lock();
 8190     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8191     __ sete($res$$Register);
 8192     __ movzbl($res$$Register, $res$$Register);
 8193   %}
 8194   ins_pipe( pipe_cmpxchg );
 8195 %}
 8196 
 8197 instruct compareAndSwapS(rRegI res,
 8198                          memory mem_ptr,
 8199                          rax_RegI oldval, rRegI newval,
 8200                          rFlagsReg cr)
 8201 %{
 8202   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 8203   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 8204   effect(KILL cr, KILL oldval);
 8205 
 8206   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8207             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8208             "sete    $res\n\t"
 8209             "movzbl  $res, $res" %}
 8210   ins_encode %{
 8211     __ lock();
 8212     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8213     __ sete($res$$Register);
 8214     __ movzbl($res$$Register, $res$$Register);
 8215   %}
 8216   ins_pipe( pipe_cmpxchg );
 8217 %}
 8218 
 8219 instruct compareAndSwapN(rRegI res,
 8220                           memory mem_ptr,
 8221                           rax_RegN oldval, rRegN newval,
 8222                           rFlagsReg cr) %{
 8223   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
 8224   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
 8225   effect(KILL cr, KILL oldval);
 8226 
 8227   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8228             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 8229             "sete    $res\n\t"
 8230             "movzbl  $res, $res" %}
 8231   ins_encode %{
 8232     __ lock();
 8233     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8234     __ sete($res$$Register);
 8235     __ movzbl($res$$Register, $res$$Register);
 8236   %}
 8237   ins_pipe( pipe_cmpxchg );
 8238 %}
 8239 
 8240 instruct compareAndExchangeB(
 8241                          memory mem_ptr,
 8242                          rax_RegI oldval, rRegI newval,
 8243                          rFlagsReg cr)
 8244 %{
 8245   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 8246   effect(KILL cr);
 8247 
 8248   format %{ "cmpxchgb $mem_ptr,$newval\t# "
 8249             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8250   ins_encode %{
 8251     __ lock();
 8252     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
 8253   %}
 8254   ins_pipe( pipe_cmpxchg );
 8255 %}
 8256 
 8257 instruct compareAndExchangeS(
 8258                          memory mem_ptr,
 8259                          rax_RegI oldval, rRegI newval,
 8260                          rFlagsReg cr)
 8261 %{
 8262   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 8263   effect(KILL cr);
 8264 
 8265   format %{ "cmpxchgw $mem_ptr,$newval\t# "
 8266             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8267   ins_encode %{
 8268     __ lock();
 8269     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
 8270   %}
 8271   ins_pipe( pipe_cmpxchg );
 8272 %}
 8273 
 8274 instruct compareAndExchangeI(
 8275                          memory mem_ptr,
 8276                          rax_RegI oldval, rRegI newval,
 8277                          rFlagsReg cr)
 8278 %{
 8279   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 8280   effect(KILL cr);
 8281 
 8282   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8283             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8284   ins_encode %{
 8285     __ lock();
 8286     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8287   %}
 8288   ins_pipe( pipe_cmpxchg );
 8289 %}
 8290 
 8291 instruct compareAndExchangeL(
 8292                          memory mem_ptr,
 8293                          rax_RegL oldval, rRegL newval,
 8294                          rFlagsReg cr)
 8295 %{
 8296   predicate(VM_Version::supports_cx8());
 8297   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 8298   effect(KILL cr);
 8299 
 8300   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8301             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
 8302   ins_encode %{
 8303     __ lock();
 8304     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8305   %}
 8306   ins_pipe( pipe_cmpxchg );
 8307 %}
 8308 
 8309 instruct compareAndExchangeN(
 8310                           memory mem_ptr,
 8311                           rax_RegN oldval, rRegN newval,
 8312                           rFlagsReg cr) %{
 8313   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
 8314   effect(KILL cr);
 8315 
 8316   format %{ "cmpxchgl $mem_ptr,$newval\t# "
 8317             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8318   ins_encode %{
 8319     __ lock();
 8320     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
 8321   %}
 8322   ins_pipe( pipe_cmpxchg );
 8323 %}
 8324 
 8325 instruct compareAndExchangeP(
 8326                          memory mem_ptr,
 8327                          rax_RegP oldval, rRegP newval,
 8328                          rFlagsReg cr)
 8329 %{
 8330   predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
 8331   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 8332   effect(KILL cr);
 8333 
 8334   format %{ "cmpxchgq $mem_ptr,$newval\t# "
 8335             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 8336   ins_encode %{
 8337     __ lock();
 8338     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
 8339   %}
 8340   ins_pipe( pipe_cmpxchg );
 8341 %}
 8342 
 8343 instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8344   predicate(n->as_LoadStore()->result_not_used());
 8345   match(Set dummy (GetAndAddB mem add));
 8346   effect(KILL cr);
 8347   format %{ "ADDB  [$mem],$add" %}
 8348   ins_encode %{
 8349     __ lock();
 8350     __ addb($mem$$Address, $add$$constant);
 8351   %}
 8352   ins_pipe( pipe_cmpxchg );
 8353 %}
 8354 
 8355 instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
 8356   match(Set newval (GetAndAddB mem newval));
 8357   effect(KILL cr);
 8358   format %{ "XADDB  [$mem],$newval" %}
 8359   ins_encode %{
 8360     __ lock();
 8361     __ xaddb($mem$$Address, $newval$$Register);
 8362   %}
 8363   ins_pipe( pipe_cmpxchg );
 8364 %}
 8365 
 8366 instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8367   predicate(n->as_LoadStore()->result_not_used());
 8368   match(Set dummy (GetAndAddS mem add));
 8369   effect(KILL cr);
 8370   format %{ "ADDW  [$mem],$add" %}
 8371   ins_encode %{
 8372     __ lock();
 8373     __ addw($mem$$Address, $add$$constant);
 8374   %}
 8375   ins_pipe( pipe_cmpxchg );
 8376 %}
 8377 
 8378 instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
 8379   match(Set newval (GetAndAddS mem newval));
 8380   effect(KILL cr);
 8381   format %{ "XADDW  [$mem],$newval" %}
 8382   ins_encode %{
 8383     __ lock();
 8384     __ xaddw($mem$$Address, $newval$$Register);
 8385   %}
 8386   ins_pipe( pipe_cmpxchg );
 8387 %}
 8388 
 8389 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
 8390   predicate(n->as_LoadStore()->result_not_used());
 8391   match(Set dummy (GetAndAddI mem add));
 8392   effect(KILL cr);
 8393   format %{ "ADDL  [$mem],$add" %}
 8394   ins_encode %{
 8395     __ lock();
 8396     __ addl($mem$$Address, $add$$constant);
 8397   %}
 8398   ins_pipe( pipe_cmpxchg );
 8399 %}
 8400 
 8401 instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
 8402   match(Set newval (GetAndAddI mem newval));
 8403   effect(KILL cr);
 8404   format %{ "XADDL  [$mem],$newval" %}
 8405   ins_encode %{
 8406     __ lock();
 8407     __ xaddl($mem$$Address, $newval$$Register);
 8408   %}
 8409   ins_pipe( pipe_cmpxchg );
 8410 %}
 8411 
 8412 instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
 8413   predicate(n->as_LoadStore()->result_not_used());
 8414   match(Set dummy (GetAndAddL mem add));
 8415   effect(KILL cr);
 8416   format %{ "ADDQ  [$mem],$add" %}
 8417   ins_encode %{
 8418     __ lock();
 8419     __ addq($mem$$Address, $add$$constant);
 8420   %}
 8421   ins_pipe( pipe_cmpxchg );
 8422 %}
 8423 
 8424 instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
 8425   match(Set newval (GetAndAddL mem newval));
 8426   effect(KILL cr);
 8427   format %{ "XADDQ  [$mem],$newval" %}
 8428   ins_encode %{
 8429     __ lock();
 8430     __ xaddq($mem$$Address, $newval$$Register);
 8431   %}
 8432   ins_pipe( pipe_cmpxchg );
 8433 %}
 8434 
 8435 instruct xchgB( memory mem, rRegI newval) %{
 8436   match(Set newval (GetAndSetB mem newval));
 8437   format %{ "XCHGB  $newval,[$mem]" %}
 8438   ins_encode %{
 8439     __ xchgb($newval$$Register, $mem$$Address);
 8440   %}
 8441   ins_pipe( pipe_cmpxchg );
 8442 %}
 8443 
 8444 instruct xchgS( memory mem, rRegI newval) %{
 8445   match(Set newval (GetAndSetS mem newval));
 8446   format %{ "XCHGW  $newval,[$mem]" %}
 8447   ins_encode %{
 8448     __ xchgw($newval$$Register, $mem$$Address);
 8449   %}
 8450   ins_pipe( pipe_cmpxchg );
 8451 %}
 8452 
 8453 instruct xchgI( memory mem, rRegI newval) %{
 8454   match(Set newval (GetAndSetI mem newval));
 8455   format %{ "XCHGL  $newval,[$mem]" %}
 8456   ins_encode %{
 8457     __ xchgl($newval$$Register, $mem$$Address);
 8458   %}
 8459   ins_pipe( pipe_cmpxchg );
 8460 %}
 8461 
 8462 instruct xchgL( memory mem, rRegL newval) %{
 8463   match(Set newval (GetAndSetL mem newval));
 8464   format %{ "XCHGL  $newval,[$mem]" %}
 8465   ins_encode %{
 8466     __ xchgq($newval$$Register, $mem$$Address);
 8467   %}
 8468   ins_pipe( pipe_cmpxchg );
 8469 %}
 8470 
 8471 instruct xchgP( memory mem, rRegP newval) %{
 8472   match(Set newval (GetAndSetP mem newval));
 8473   predicate(n->as_LoadStore()->barrier_data() == 0);
 8474   format %{ "XCHGQ  $newval,[$mem]" %}
 8475   ins_encode %{
 8476     __ xchgq($newval$$Register, $mem$$Address);
 8477   %}
 8478   ins_pipe( pipe_cmpxchg );
 8479 %}
 8480 
 8481 instruct xchgN( memory mem, rRegN newval) %{
 8482   match(Set newval (GetAndSetN mem newval));
 8483   format %{ "XCHGL  $newval,$mem]" %}
 8484   ins_encode %{
 8485     __ xchgl($newval$$Register, $mem$$Address);
 8486   %}
 8487   ins_pipe( pipe_cmpxchg );
 8488 %}
 8489 
 8490 //----------Abs Instructions-------------------------------------------
 8491 
 8492 // Integer Absolute Instructions
 8493 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr)
 8494 %{
 8495   match(Set dst (AbsI src));
 8496   effect(TEMP dst, TEMP tmp, KILL cr);
 8497   format %{ "movl $tmp, $src\n\t"
 8498             "sarl $tmp, 31\n\t"
 8499             "movl $dst, $src\n\t"
 8500             "xorl $dst, $tmp\n\t"
 8501             "subl $dst, $tmp\n"
 8502           %}
 8503   ins_encode %{
 8504     __ movl($tmp$$Register, $src$$Register);
 8505     __ sarl($tmp$$Register, 31);
 8506     __ movl($dst$$Register, $src$$Register);
 8507     __ xorl($dst$$Register, $tmp$$Register);
 8508     __ subl($dst$$Register, $tmp$$Register);
 8509   %}
 8510 
 8511   ins_pipe(ialu_reg_reg);
 8512 %}
 8513 
 8514 // Long Absolute Instructions
 8515 instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr)
 8516 %{
 8517   match(Set dst (AbsL src));
 8518   effect(TEMP dst, TEMP tmp, KILL cr);
 8519   format %{ "movq $tmp, $src\n\t"
 8520             "sarq $tmp, 63\n\t"
 8521             "movq $dst, $src\n\t"
 8522             "xorq $dst, $tmp\n\t"
 8523             "subq $dst, $tmp\n"
 8524           %}
 8525   ins_encode %{
 8526     __ movq($tmp$$Register, $src$$Register);
 8527     __ sarq($tmp$$Register, 63);
 8528     __ movq($dst$$Register, $src$$Register);
 8529     __ xorq($dst$$Register, $tmp$$Register);
 8530     __ subq($dst$$Register, $tmp$$Register);
 8531   %}
 8532 
 8533   ins_pipe(ialu_reg_reg);
 8534 %}
 8535 
 8536 //----------Subtraction Instructions-------------------------------------------
 8537 
 8538 // Integer Subtraction Instructions
 8539 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8540 %{
 8541   match(Set dst (SubI dst src));
 8542   effect(KILL cr);
 8543 
 8544   format %{ "subl    $dst, $src\t# int" %}
 8545   ins_encode %{
 8546     __ subl($dst$$Register, $src$$Register);
 8547   %}
 8548   ins_pipe(ialu_reg_reg);
 8549 %}
 8550 
 8551 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 8552 %{
 8553   match(Set dst (SubI dst (LoadI src)));
 8554   effect(KILL cr);
 8555 
 8556   ins_cost(150);
 8557   format %{ "subl    $dst, $src\t# int" %}
 8558   ins_encode %{
 8559     __ subl($dst$$Register, $src$$Address);
 8560   %}
 8561   ins_pipe(ialu_reg_mem);
 8562 %}
 8563 
 8564 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 8565 %{
 8566   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 8567   effect(KILL cr);
 8568 
 8569   ins_cost(150);
 8570   format %{ "subl    $dst, $src\t# int" %}
 8571   ins_encode %{
 8572     __ subl($dst$$Address, $src$$Register);
 8573   %}
 8574   ins_pipe(ialu_mem_reg);
 8575 %}
 8576 
 8577 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8578 %{
 8579   match(Set dst (SubL dst src));
 8580   effect(KILL cr);
 8581 
 8582   format %{ "subq    $dst, $src\t# long" %}
 8583   ins_encode %{
 8584     __ subq($dst$$Register, $src$$Register);
 8585   %}
 8586   ins_pipe(ialu_reg_reg);
 8587 %}
 8588 
 8589 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
 8590 %{
 8591   match(Set dst (SubL dst (LoadL src)));
 8592   effect(KILL cr);
 8593 
 8594   ins_cost(150);
 8595   format %{ "subq    $dst, $src\t# long" %}
 8596   ins_encode %{
 8597     __ subq($dst$$Register, $src$$Address);
 8598   %}
 8599   ins_pipe(ialu_reg_mem);
 8600 %}
 8601 
 8602 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
 8603 %{
 8604   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
 8605   effect(KILL cr);
 8606 
 8607   ins_cost(150);
 8608   format %{ "subq    $dst, $src\t# long" %}
 8609   ins_encode %{
 8610     __ subq($dst$$Address, $src$$Register);
 8611   %}
 8612   ins_pipe(ialu_mem_reg);
 8613 %}
 8614 
 8615 // Subtract from a pointer
 8616 // XXX hmpf???
 8617 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
 8618 %{
 8619   match(Set dst (AddP dst (SubI zero src)));
 8620   effect(KILL cr);
 8621 
 8622   format %{ "subq    $dst, $src\t# ptr - int" %}
 8623   opcode(0x2B);
 8624   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
 8625   ins_pipe(ialu_reg_reg);
 8626 %}
 8627 
 8628 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
 8629 %{
 8630   match(Set dst (SubI zero dst));
 8631   effect(KILL cr);
 8632 
 8633   format %{ "negl    $dst\t# int" %}
 8634   ins_encode %{
 8635     __ negl($dst$$Register);
 8636   %}
 8637   ins_pipe(ialu_reg);
 8638 %}
 8639 
 8640 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
 8641 %{
 8642   match(Set dst (NegI dst));
 8643   effect(KILL cr);
 8644 
 8645   format %{ "negl    $dst\t# int" %}
 8646   ins_encode %{
 8647     __ negl($dst$$Register);
 8648   %}
 8649   ins_pipe(ialu_reg);
 8650 %}
 8651 
 8652 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
 8653 %{
 8654   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
 8655   effect(KILL cr);
 8656 
 8657   format %{ "negl    $dst\t# int" %}
 8658   ins_encode %{
 8659     __ negl($dst$$Address);
 8660   %}
 8661   ins_pipe(ialu_reg);
 8662 %}
 8663 
 8664 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
 8665 %{
 8666   match(Set dst (SubL zero dst));
 8667   effect(KILL cr);
 8668 
 8669   format %{ "negq    $dst\t# long" %}
 8670   ins_encode %{
 8671     __ negq($dst$$Register);
 8672   %}
 8673   ins_pipe(ialu_reg);
 8674 %}
 8675 
 8676 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
 8677 %{
 8678   match(Set dst (NegL dst));
 8679   effect(KILL cr);
 8680 
 8681   format %{ "negq    $dst\t# int" %}
 8682   ins_encode %{
 8683     __ negq($dst$$Register);
 8684   %}
 8685   ins_pipe(ialu_reg);
 8686 %}
 8687 
 8688 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
 8689 %{
 8690   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
 8691   effect(KILL cr);
 8692 
 8693   format %{ "negq    $dst\t# long" %}
 8694   ins_encode %{
 8695     __ negq($dst$$Address);
 8696   %}
 8697   ins_pipe(ialu_reg);
 8698 %}
 8699 
 8700 //----------Multiplication/Division Instructions-------------------------------
 8701 // Integer Multiplication Instructions
 8702 // Multiply Register
 8703 
 8704 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 8705 %{
 8706   match(Set dst (MulI dst src));
 8707   effect(KILL cr);
 8708 
 8709   ins_cost(300);
 8710   format %{ "imull   $dst, $src\t# int" %}
 8711   ins_encode %{
 8712     __ imull($dst$$Register, $src$$Register);
 8713   %}
 8714   ins_pipe(ialu_reg_reg_alu0);
 8715 %}
 8716 
 8717 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
 8718 %{
 8719   match(Set dst (MulI src imm));
 8720   effect(KILL cr);
 8721 
 8722   ins_cost(300);
 8723   format %{ "imull   $dst, $src, $imm\t# int" %}
 8724   ins_encode %{
 8725     __ imull($dst$$Register, $src$$Register, $imm$$constant);
 8726   %}
 8727   ins_pipe(ialu_reg_reg_alu0);
 8728 %}
 8729 
 8730 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
 8731 %{
 8732   match(Set dst (MulI dst (LoadI src)));
 8733   effect(KILL cr);
 8734 
 8735   ins_cost(350);
 8736   format %{ "imull   $dst, $src\t# int" %}
 8737   ins_encode %{
 8738     __ imull($dst$$Register, $src$$Address);
 8739   %}
 8740   ins_pipe(ialu_reg_mem_alu0);
 8741 %}
 8742 
 8743 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
 8744 %{
 8745   match(Set dst (MulI (LoadI src) imm));
 8746   effect(KILL cr);
 8747 
 8748   ins_cost(300);
 8749   format %{ "imull   $dst, $src, $imm\t# int" %}
 8750   ins_encode %{
 8751     __ imull($dst$$Register, $src$$Address, $imm$$constant);
 8752   %}
 8753   ins_pipe(ialu_reg_mem_alu0);
 8754 %}
 8755 
 8756 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
 8757 %{
 8758   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 8759   effect(KILL cr, KILL src2);
 8760 
 8761   expand %{ mulI_rReg(dst, src1, cr);
 8762            mulI_rReg(src2, src3, cr);
 8763            addI_rReg(dst, src2, cr); %}
 8764 %}
 8765 
 8766 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
 8767 %{
 8768   match(Set dst (MulL dst src));
 8769   effect(KILL cr);
 8770 
 8771   ins_cost(300);
 8772   format %{ "imulq   $dst, $src\t# long" %}
 8773   ins_encode %{
 8774     __ imulq($dst$$Register, $src$$Register);
 8775   %}
 8776   ins_pipe(ialu_reg_reg_alu0);
 8777 %}
 8778 
 8779 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
 8780 %{
 8781   match(Set dst (MulL src imm));
 8782   effect(KILL cr);
 8783 
 8784   ins_cost(300);
 8785   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8786   ins_encode %{
 8787     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
 8788   %}
 8789   ins_pipe(ialu_reg_reg_alu0);
 8790 %}
 8791 
 8792 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
 8793 %{
 8794   match(Set dst (MulL dst (LoadL src)));
 8795   effect(KILL cr);
 8796 
 8797   ins_cost(350);
 8798   format %{ "imulq   $dst, $src\t# long" %}
 8799   ins_encode %{
 8800     __ imulq($dst$$Register, $src$$Address);
 8801   %}
 8802   ins_pipe(ialu_reg_mem_alu0);
 8803 %}
 8804 
 8805 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
 8806 %{
 8807   match(Set dst (MulL (LoadL src) imm));
 8808   effect(KILL cr);
 8809 
 8810   ins_cost(300);
 8811   format %{ "imulq   $dst, $src, $imm\t# long" %}
 8812   ins_encode %{
 8813     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
 8814   %}
 8815   ins_pipe(ialu_reg_mem_alu0);
 8816 %}
 8817 
 8818 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8819 %{
 8820   match(Set dst (MulHiL src rax));
 8821   effect(USE_KILL rax, KILL cr);
 8822 
 8823   ins_cost(300);
 8824   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
 8825   ins_encode %{
 8826     __ imulq($src$$Register);
 8827   %}
 8828   ins_pipe(ialu_reg_reg_alu0);
 8829 %}
 8830 
 8831 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
 8832 %{
 8833   match(Set dst (UMulHiL src rax));
 8834   effect(USE_KILL rax, KILL cr);
 8835 
 8836   ins_cost(300);
 8837   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
 8838   ins_encode %{
 8839     __ mulq($src$$Register);
 8840   %}
 8841   ins_pipe(ialu_reg_reg_alu0);
 8842 %}
 8843 
 8844 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8845                    rFlagsReg cr)
 8846 %{
 8847   match(Set rax (DivI rax div));
 8848   effect(KILL rdx, KILL cr);
 8849 
 8850   ins_cost(30*100+10*100); // XXX
 8851   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8852             "jne,s   normal\n\t"
 8853             "xorl    rdx, rdx\n\t"
 8854             "cmpl    $div, -1\n\t"
 8855             "je,s    done\n"
 8856     "normal: cdql\n\t"
 8857             "idivl   $div\n"
 8858     "done:"        %}
 8859   ins_encode(cdql_enc(div));
 8860   ins_pipe(ialu_reg_reg_alu0);
 8861 %}
 8862 
 8863 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8864                    rFlagsReg cr)
 8865 %{
 8866   match(Set rax (DivL rax div));
 8867   effect(KILL rdx, KILL cr);
 8868 
 8869   ins_cost(30*100+10*100); // XXX
 8870   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8871             "cmpq    rax, rdx\n\t"
 8872             "jne,s   normal\n\t"
 8873             "xorl    rdx, rdx\n\t"
 8874             "cmpq    $div, -1\n\t"
 8875             "je,s    done\n"
 8876     "normal: cdqq\n\t"
 8877             "idivq   $div\n"
 8878     "done:"        %}
 8879   ins_encode(cdqq_enc(div));
 8880   ins_pipe(ialu_reg_reg_alu0);
 8881 %}
 8882 
 8883 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
 8884 %{
 8885   match(Set rax (UDivI rax div));
 8886   effect(KILL rdx, KILL cr);
 8887 
 8888   ins_cost(300);
 8889   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
 8890   ins_encode %{
 8891     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
 8892   %}
 8893   ins_pipe(ialu_reg_reg_alu0);
 8894 %}
 8895 
 8896 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
 8897 %{
 8898   match(Set rax (UDivL rax div));
 8899   effect(KILL rdx, KILL cr);
 8900 
 8901   ins_cost(300);
 8902   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
 8903   ins_encode %{
 8904      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
 8905   %}
 8906   ins_pipe(ialu_reg_reg_alu0);
 8907 %}
 8908 
 8909 // Integer DIVMOD with Register, both quotient and mod results
 8910 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
 8911                              rFlagsReg cr)
 8912 %{
 8913   match(DivModI rax div);
 8914   effect(KILL cr);
 8915 
 8916   ins_cost(30*100+10*100); // XXX
 8917   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
 8918             "jne,s   normal\n\t"
 8919             "xorl    rdx, rdx\n\t"
 8920             "cmpl    $div, -1\n\t"
 8921             "je,s    done\n"
 8922     "normal: cdql\n\t"
 8923             "idivl   $div\n"
 8924     "done:"        %}
 8925   ins_encode(cdql_enc(div));
 8926   ins_pipe(pipe_slow);
 8927 %}
 8928 
 8929 // Long DIVMOD with Register, both quotient and mod results
 8930 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
 8931                              rFlagsReg cr)
 8932 %{
 8933   match(DivModL rax div);
 8934   effect(KILL cr);
 8935 
 8936   ins_cost(30*100+10*100); // XXX
 8937   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
 8938             "cmpq    rax, rdx\n\t"
 8939             "jne,s   normal\n\t"
 8940             "xorl    rdx, rdx\n\t"
 8941             "cmpq    $div, -1\n\t"
 8942             "je,s    done\n"
 8943     "normal: cdqq\n\t"
 8944             "idivq   $div\n"
 8945     "done:"        %}
 8946   ins_encode(cdqq_enc(div));
 8947   ins_pipe(pipe_slow);
 8948 %}
 8949 
 8950 // Unsigned integer DIVMOD with Register, both quotient and mod results
 8951 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
 8952                               no_rax_rdx_RegI div, rFlagsReg cr)
 8953 %{
 8954   match(UDivModI rax div);
 8955   effect(TEMP tmp, KILL cr);
 8956 
 8957   ins_cost(300);
 8958   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
 8959             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
 8960           %}
 8961   ins_encode %{
 8962     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8963   %}
 8964   ins_pipe(pipe_slow);
 8965 %}
 8966 
 8967 // Unsigned long DIVMOD with Register, both quotient and mod results
 8968 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
 8969                               no_rax_rdx_RegL div, rFlagsReg cr)
 8970 %{
 8971   match(UDivModL rax div);
 8972   effect(TEMP tmp, KILL cr);
 8973 
 8974   ins_cost(300);
 8975   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
 8976             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
 8977           %}
 8978   ins_encode %{
 8979     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
 8980   %}
 8981   ins_pipe(pipe_slow);
 8982 %}
 8983 
 8984 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
 8985                    rFlagsReg cr)
 8986 %{
 8987   match(Set rdx (ModI rax div));
 8988   effect(KILL rax, KILL cr);
 8989 
 8990   ins_cost(300); // XXX
 8991   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
 8992             "jne,s   normal\n\t"
 8993             "xorl    rdx, rdx\n\t"
 8994             "cmpl    $div, -1\n\t"
 8995             "je,s    done\n"
 8996     "normal: cdql\n\t"
 8997             "idivl   $div\n"
 8998     "done:"        %}
 8999   ins_encode(cdql_enc(div));
 9000   ins_pipe(ialu_reg_reg_alu0);
 9001 %}
 9002 
 9003 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
 9004                    rFlagsReg cr)
 9005 %{
 9006   match(Set rdx (ModL rax div));
 9007   effect(KILL rax, KILL cr);
 9008 
 9009   ins_cost(300); // XXX
 9010   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
 9011             "cmpq    rax, rdx\n\t"
 9012             "jne,s   normal\n\t"
 9013             "xorl    rdx, rdx\n\t"
 9014             "cmpq    $div, -1\n\t"
 9015             "je,s    done\n"
 9016     "normal: cdqq\n\t"
 9017             "idivq   $div\n"
 9018     "done:"        %}
 9019   ins_encode(cdqq_enc(div));
 9020   ins_pipe(ialu_reg_reg_alu0);
 9021 %}
 9022 
 9023 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
 9024 %{
 9025   match(Set rdx (UModI rax div));
 9026   effect(KILL rax, KILL cr);
 9027 
 9028   ins_cost(300);
 9029   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
 9030   ins_encode %{
 9031     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
 9032   %}
 9033   ins_pipe(ialu_reg_reg_alu0);
 9034 %}
 9035 
 9036 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
 9037 %{
 9038   match(Set rdx (UModL rax div));
 9039   effect(KILL rax, KILL cr);
 9040 
 9041   ins_cost(300);
 9042   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
 9043   ins_encode %{
 9044     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
 9045   %}
 9046   ins_pipe(ialu_reg_reg_alu0);
 9047 %}
 9048 
 9049 // Integer Shift Instructions
 9050 // Shift Left by 8-bit immediate
 9051 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9052 %{
 9053   match(Set dst (LShiftI dst shift));
 9054   effect(KILL cr);
 9055 
 9056   format %{ "sall    $dst, $shift" %}
 9057   ins_encode %{
 9058     __ sall($dst$$Register, $shift$$constant);
 9059   %}
 9060   ins_pipe(ialu_reg);
 9061 %}
 9062 
 9063 // Shift Left by 8-bit immediate
 9064 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9065 %{
 9066   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9067   effect(KILL cr);
 9068 
 9069   format %{ "sall    $dst, $shift" %}
 9070   ins_encode %{
 9071     __ sall($dst$$Address, $shift$$constant);
 9072   %}
 9073   ins_pipe(ialu_mem_imm);
 9074 %}
 9075 
 9076 // Shift Left by variable
 9077 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9078 %{
 9079   predicate(!VM_Version::supports_bmi2());
 9080   match(Set dst (LShiftI dst shift));
 9081   effect(KILL cr);
 9082 
 9083   format %{ "sall    $dst, $shift" %}
 9084   ins_encode %{
 9085     __ sall($dst$$Register);
 9086   %}
 9087   ins_pipe(ialu_reg_reg);
 9088 %}
 9089 
 9090 // Shift Left by variable
 9091 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9092 %{
 9093   predicate(!VM_Version::supports_bmi2());
 9094   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
 9095   effect(KILL cr);
 9096 
 9097   format %{ "sall    $dst, $shift" %}
 9098   ins_encode %{
 9099     __ sall($dst$$Address);
 9100   %}
 9101   ins_pipe(ialu_mem_reg);
 9102 %}
 9103 
 9104 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9105 %{
 9106   predicate(VM_Version::supports_bmi2());
 9107   match(Set dst (LShiftI src shift));
 9108 
 9109   format %{ "shlxl   $dst, $src, $shift" %}
 9110   ins_encode %{
 9111     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
 9112   %}
 9113   ins_pipe(ialu_reg_reg);
 9114 %}
 9115 
 9116 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9117 %{
 9118   predicate(VM_Version::supports_bmi2());
 9119   match(Set dst (LShiftI (LoadI src) shift));
 9120   ins_cost(175);
 9121   format %{ "shlxl   $dst, $src, $shift" %}
 9122   ins_encode %{
 9123     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
 9124   %}
 9125   ins_pipe(ialu_reg_mem);
 9126 %}
 9127 
 9128 // Arithmetic Shift Right by 8-bit immediate
 9129 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9130 %{
 9131   match(Set dst (RShiftI dst shift));
 9132   effect(KILL cr);
 9133 
 9134   format %{ "sarl    $dst, $shift" %}
 9135   ins_encode %{
 9136     __ sarl($dst$$Register, $shift$$constant);
 9137   %}
 9138   ins_pipe(ialu_mem_imm);
 9139 %}
 9140 
 9141 // Arithmetic Shift Right by 8-bit immediate
 9142 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9143 %{
 9144   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9145   effect(KILL cr);
 9146 
 9147   format %{ "sarl    $dst, $shift" %}
 9148   ins_encode %{
 9149     __ sarl($dst$$Address, $shift$$constant);
 9150   %}
 9151   ins_pipe(ialu_mem_imm);
 9152 %}
 9153 
 9154 // Arithmetic Shift Right by variable
 9155 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9156 %{
 9157   predicate(!VM_Version::supports_bmi2());
 9158   match(Set dst (RShiftI dst shift));
 9159   effect(KILL cr);
 9160   format %{ "sarl    $dst, $shift" %}
 9161   ins_encode %{
 9162     __ sarl($dst$$Register);
 9163   %}
 9164   ins_pipe(ialu_reg_reg);
 9165 %}
 9166 
 9167 // Arithmetic Shift Right by variable
 9168 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9169 %{
 9170   predicate(!VM_Version::supports_bmi2());
 9171   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 9172   effect(KILL cr);
 9173 
 9174   format %{ "sarl    $dst, $shift" %}
 9175   ins_encode %{
 9176     __ sarl($dst$$Address);
 9177   %}
 9178   ins_pipe(ialu_mem_reg);
 9179 %}
 9180 
 9181 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9182 %{
 9183   predicate(VM_Version::supports_bmi2());
 9184   match(Set dst (RShiftI src shift));
 9185 
 9186   format %{ "sarxl   $dst, $src, $shift" %}
 9187   ins_encode %{
 9188     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
 9189   %}
 9190   ins_pipe(ialu_reg_reg);
 9191 %}
 9192 
 9193 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9194 %{
 9195   predicate(VM_Version::supports_bmi2());
 9196   match(Set dst (RShiftI (LoadI src) shift));
 9197   ins_cost(175);
 9198   format %{ "sarxl   $dst, $src, $shift" %}
 9199   ins_encode %{
 9200     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
 9201   %}
 9202   ins_pipe(ialu_reg_mem);
 9203 %}
 9204 
 9205 // Logical Shift Right by 8-bit immediate
 9206 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
 9207 %{
 9208   match(Set dst (URShiftI dst shift));
 9209   effect(KILL cr);
 9210 
 9211   format %{ "shrl    $dst, $shift" %}
 9212   ins_encode %{
 9213     __ shrl($dst$$Register, $shift$$constant);
 9214   %}
 9215   ins_pipe(ialu_reg);
 9216 %}
 9217 
 9218 // Logical Shift Right by 8-bit immediate
 9219 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9220 %{
 9221   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9222   effect(KILL cr);
 9223 
 9224   format %{ "shrl    $dst, $shift" %}
 9225   ins_encode %{
 9226     __ shrl($dst$$Address, $shift$$constant);
 9227   %}
 9228   ins_pipe(ialu_mem_imm);
 9229 %}
 9230 
 9231 // Logical Shift Right by variable
 9232 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9233 %{
 9234   predicate(!VM_Version::supports_bmi2());
 9235   match(Set dst (URShiftI dst shift));
 9236   effect(KILL cr);
 9237 
 9238   format %{ "shrl    $dst, $shift" %}
 9239   ins_encode %{
 9240     __ shrl($dst$$Register);
 9241   %}
 9242   ins_pipe(ialu_reg_reg);
 9243 %}
 9244 
 9245 // Logical Shift Right by variable
 9246 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9247 %{
 9248   predicate(!VM_Version::supports_bmi2());
 9249   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
 9250   effect(KILL cr);
 9251 
 9252   format %{ "shrl    $dst, $shift" %}
 9253   ins_encode %{
 9254     __ shrl($dst$$Address);
 9255   %}
 9256   ins_pipe(ialu_mem_reg);
 9257 %}
 9258 
 9259 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
 9260 %{
 9261   predicate(VM_Version::supports_bmi2());
 9262   match(Set dst (URShiftI src shift));
 9263 
 9264   format %{ "shrxl   $dst, $src, $shift" %}
 9265   ins_encode %{
 9266     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
 9267   %}
 9268   ins_pipe(ialu_reg_reg);
 9269 %}
 9270 
 9271 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
 9272 %{
 9273   predicate(VM_Version::supports_bmi2());
 9274   match(Set dst (URShiftI (LoadI src) shift));
 9275   ins_cost(175);
 9276   format %{ "shrxl   $dst, $src, $shift" %}
 9277   ins_encode %{
 9278     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
 9279   %}
 9280   ins_pipe(ialu_reg_mem);
 9281 %}
 9282 
 9283 // Long Shift Instructions
 9284 // Shift Left by 8-bit immediate
 9285 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9286 %{
 9287   match(Set dst (LShiftL dst shift));
 9288   effect(KILL cr);
 9289 
 9290   format %{ "salq    $dst, $shift" %}
 9291   ins_encode %{
 9292     __ salq($dst$$Register, $shift$$constant);
 9293   %}
 9294   ins_pipe(ialu_reg);
 9295 %}
 9296 
 9297 // Shift Left by 8-bit immediate
 9298 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9299 %{
 9300   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9301   effect(KILL cr);
 9302 
 9303   format %{ "salq    $dst, $shift" %}
 9304   ins_encode %{
 9305     __ salq($dst$$Address, $shift$$constant);
 9306   %}
 9307   ins_pipe(ialu_mem_imm);
 9308 %}
 9309 
 9310 // Shift Left by variable
 9311 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9312 %{
 9313   predicate(!VM_Version::supports_bmi2());
 9314   match(Set dst (LShiftL dst shift));
 9315   effect(KILL cr);
 9316 
 9317   format %{ "salq    $dst, $shift" %}
 9318   ins_encode %{
 9319     __ salq($dst$$Register);
 9320   %}
 9321   ins_pipe(ialu_reg_reg);
 9322 %}
 9323 
 9324 // Shift Left by variable
 9325 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9326 %{
 9327   predicate(!VM_Version::supports_bmi2());
 9328   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
 9329   effect(KILL cr);
 9330 
 9331   format %{ "salq    $dst, $shift" %}
 9332   ins_encode %{
 9333     __ salq($dst$$Address);
 9334   %}
 9335   ins_pipe(ialu_mem_reg);
 9336 %}
 9337 
 9338 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9339 %{
 9340   predicate(VM_Version::supports_bmi2());
 9341   match(Set dst (LShiftL src shift));
 9342 
 9343   format %{ "shlxq   $dst, $src, $shift" %}
 9344   ins_encode %{
 9345     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
 9346   %}
 9347   ins_pipe(ialu_reg_reg);
 9348 %}
 9349 
 9350 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9351 %{
 9352   predicate(VM_Version::supports_bmi2());
 9353   match(Set dst (LShiftL (LoadL src) shift));
 9354   ins_cost(175);
 9355   format %{ "shlxq   $dst, $src, $shift" %}
 9356   ins_encode %{
 9357     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
 9358   %}
 9359   ins_pipe(ialu_reg_mem);
 9360 %}
 9361 
 9362 // Arithmetic Shift Right by 8-bit immediate
 9363 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
 9364 %{
 9365   match(Set dst (RShiftL dst shift));
 9366   effect(KILL cr);
 9367 
 9368   format %{ "sarq    $dst, $shift" %}
 9369   ins_encode %{
 9370     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
 9371   %}
 9372   ins_pipe(ialu_mem_imm);
 9373 %}
 9374 
 9375 // Arithmetic Shift Right by 8-bit immediate
 9376 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
 9377 %{
 9378   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9379   effect(KILL cr);
 9380 
 9381   format %{ "sarq    $dst, $shift" %}
 9382   ins_encode %{
 9383     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
 9384   %}
 9385   ins_pipe(ialu_mem_imm);
 9386 %}
 9387 
 9388 // Arithmetic Shift Right by variable
 9389 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9390 %{
 9391   predicate(!VM_Version::supports_bmi2());
 9392   match(Set dst (RShiftL dst shift));
 9393   effect(KILL cr);
 9394 
 9395   format %{ "sarq    $dst, $shift" %}
 9396   ins_encode %{
 9397     __ sarq($dst$$Register);
 9398   %}
 9399   ins_pipe(ialu_reg_reg);
 9400 %}
 9401 
 9402 // Arithmetic Shift Right by variable
 9403 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9404 %{
 9405   predicate(!VM_Version::supports_bmi2());
 9406   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
 9407   effect(KILL cr);
 9408 
 9409   format %{ "sarq    $dst, $shift" %}
 9410   ins_encode %{
 9411     __ sarq($dst$$Address);
 9412   %}
 9413   ins_pipe(ialu_mem_reg);
 9414 %}
 9415 
 9416 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9417 %{
 9418   predicate(VM_Version::supports_bmi2());
 9419   match(Set dst (RShiftL src shift));
 9420 
 9421   format %{ "sarxq   $dst, $src, $shift" %}
 9422   ins_encode %{
 9423     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
 9424   %}
 9425   ins_pipe(ialu_reg_reg);
 9426 %}
 9427 
 9428 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9429 %{
 9430   predicate(VM_Version::supports_bmi2());
 9431   match(Set dst (RShiftL (LoadL src) shift));
 9432   ins_cost(175);
 9433   format %{ "sarxq   $dst, $src, $shift" %}
 9434   ins_encode %{
 9435     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
 9436   %}
 9437   ins_pipe(ialu_reg_mem);
 9438 %}
 9439 
 9440 // Logical Shift Right by 8-bit immediate
 9441 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
 9442 %{
 9443   match(Set dst (URShiftL dst shift));
 9444   effect(KILL cr);
 9445 
 9446   format %{ "shrq    $dst, $shift" %}
 9447   ins_encode %{
 9448     __ shrq($dst$$Register, $shift$$constant);
 9449   %}
 9450   ins_pipe(ialu_reg);
 9451 %}
 9452 
 9453 // Logical Shift Right by 8-bit immediate
 9454 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
 9455 %{
 9456   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9457   effect(KILL cr);
 9458 
 9459   format %{ "shrq    $dst, $shift" %}
 9460   ins_encode %{
 9461     __ shrq($dst$$Address, $shift$$constant);
 9462   %}
 9463   ins_pipe(ialu_mem_imm);
 9464 %}
 9465 
 9466 // Logical Shift Right by variable
 9467 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9468 %{
 9469   predicate(!VM_Version::supports_bmi2());
 9470   match(Set dst (URShiftL dst shift));
 9471   effect(KILL cr);
 9472 
 9473   format %{ "shrq    $dst, $shift" %}
 9474   ins_encode %{
 9475     __ shrq($dst$$Register);
 9476   %}
 9477   ins_pipe(ialu_reg_reg);
 9478 %}
 9479 
 9480 // Logical Shift Right by variable
 9481 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
 9482 %{
 9483   predicate(!VM_Version::supports_bmi2());
 9484   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
 9485   effect(KILL cr);
 9486 
 9487   format %{ "shrq    $dst, $shift" %}
 9488   ins_encode %{
 9489     __ shrq($dst$$Address);
 9490   %}
 9491   ins_pipe(ialu_mem_reg);
 9492 %}
 9493 
 9494 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
 9495 %{
 9496   predicate(VM_Version::supports_bmi2());
 9497   match(Set dst (URShiftL src shift));
 9498 
 9499   format %{ "shrxq   $dst, $src, $shift" %}
 9500   ins_encode %{
 9501     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
 9502   %}
 9503   ins_pipe(ialu_reg_reg);
 9504 %}
 9505 
 9506 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
 9507 %{
 9508   predicate(VM_Version::supports_bmi2());
 9509   match(Set dst (URShiftL (LoadL src) shift));
 9510   ins_cost(175);
 9511   format %{ "shrxq   $dst, $src, $shift" %}
 9512   ins_encode %{
 9513     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
 9514   %}
 9515   ins_pipe(ialu_reg_mem);
 9516 %}
 9517 
 9518 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 9519 // This idiom is used by the compiler for the i2b bytecode.
 9520 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
 9521 %{
 9522   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 9523 
 9524   format %{ "movsbl  $dst, $src\t# i2b" %}
 9525   ins_encode %{
 9526     __ movsbl($dst$$Register, $src$$Register);
 9527   %}
 9528   ins_pipe(ialu_reg_reg);
 9529 %}
 9530 
 9531 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 9532 // This idiom is used by the compiler the i2s bytecode.
 9533 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
 9534 %{
 9535   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 9536 
 9537   format %{ "movswl  $dst, $src\t# i2s" %}
 9538   ins_encode %{
 9539     __ movswl($dst$$Register, $src$$Register);
 9540   %}
 9541   ins_pipe(ialu_reg_reg);
 9542 %}
 9543 
 9544 // ROL/ROR instructions
 9545 
 9546 // Rotate left by constant.
 9547 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9548 %{
 9549   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9550   match(Set dst (RotateLeft dst shift));
 9551   effect(KILL cr);
 9552   format %{ "roll    $dst, $shift" %}
 9553   ins_encode %{
 9554     __ roll($dst$$Register, $shift$$constant);
 9555   %}
 9556   ins_pipe(ialu_reg);
 9557 %}
 9558 
 9559 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
 9560 %{
 9561   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9562   match(Set dst (RotateLeft src shift));
 9563   format %{ "rolxl   $dst, $src, $shift" %}
 9564   ins_encode %{
 9565     int shift = 32 - ($shift$$constant & 31);
 9566     __ rorxl($dst$$Register, $src$$Register, shift);
 9567   %}
 9568   ins_pipe(ialu_reg_reg);
 9569 %}
 9570 
 9571 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9572 %{
 9573   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9574   match(Set dst (RotateLeft (LoadI src) shift));
 9575   ins_cost(175);
 9576   format %{ "rolxl   $dst, $src, $shift" %}
 9577   ins_encode %{
 9578     int shift = 32 - ($shift$$constant & 31);
 9579     __ rorxl($dst$$Register, $src$$Address, shift);
 9580   %}
 9581   ins_pipe(ialu_reg_mem);
 9582 %}
 9583 
 9584 // Rotate Left by variable
 9585 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9586 %{
 9587   predicate(n->bottom_type()->basic_type() == T_INT);
 9588   match(Set dst (RotateLeft dst shift));
 9589   effect(KILL cr);
 9590   format %{ "roll    $dst, $shift" %}
 9591   ins_encode %{
 9592     __ roll($dst$$Register);
 9593   %}
 9594   ins_pipe(ialu_reg_reg);
 9595 %}
 9596 
 9597 // Rotate Right by constant.
 9598 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
 9599 %{
 9600   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9601   match(Set dst (RotateRight dst shift));
 9602   effect(KILL cr);
 9603   format %{ "rorl    $dst, $shift" %}
 9604   ins_encode %{
 9605     __ rorl($dst$$Register, $shift$$constant);
 9606   %}
 9607   ins_pipe(ialu_reg);
 9608 %}
 9609 
 9610 // Rotate Right by constant.
 9611 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
 9612 %{
 9613   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9614   match(Set dst (RotateRight src shift));
 9615   format %{ "rorxl   $dst, $src, $shift" %}
 9616   ins_encode %{
 9617     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
 9618   %}
 9619   ins_pipe(ialu_reg_reg);
 9620 %}
 9621 
 9622 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
 9623 %{
 9624   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
 9625   match(Set dst (RotateRight (LoadI src) shift));
 9626   ins_cost(175);
 9627   format %{ "rorxl   $dst, $src, $shift" %}
 9628   ins_encode %{
 9629     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
 9630   %}
 9631   ins_pipe(ialu_reg_mem);
 9632 %}
 9633 
 9634 // Rotate Right by variable
 9635 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
 9636 %{
 9637   predicate(n->bottom_type()->basic_type() == T_INT);
 9638   match(Set dst (RotateRight dst shift));
 9639   effect(KILL cr);
 9640   format %{ "rorl    $dst, $shift" %}
 9641   ins_encode %{
 9642     __ rorl($dst$$Register);
 9643   %}
 9644   ins_pipe(ialu_reg_reg);
 9645 %}
 9646 
 9647 // Rotate Left by constant.
 9648 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9649 %{
 9650   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9651   match(Set dst (RotateLeft dst shift));
 9652   effect(KILL cr);
 9653   format %{ "rolq    $dst, $shift" %}
 9654   ins_encode %{
 9655     __ rolq($dst$$Register, $shift$$constant);
 9656   %}
 9657   ins_pipe(ialu_reg);
 9658 %}
 9659 
 9660 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
 9661 %{
 9662   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9663   match(Set dst (RotateLeft src shift));
 9664   format %{ "rolxq   $dst, $src, $shift" %}
 9665   ins_encode %{
 9666     int shift = 64 - ($shift$$constant & 63);
 9667     __ rorxq($dst$$Register, $src$$Register, shift);
 9668   %}
 9669   ins_pipe(ialu_reg_reg);
 9670 %}
 9671 
 9672 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9673 %{
 9674   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9675   match(Set dst (RotateLeft (LoadL src) shift));
 9676   ins_cost(175);
 9677   format %{ "rolxq   $dst, $src, $shift" %}
 9678   ins_encode %{
 9679     int shift = 64 - ($shift$$constant & 63);
 9680     __ rorxq($dst$$Register, $src$$Address, shift);
 9681   %}
 9682   ins_pipe(ialu_reg_mem);
 9683 %}
 9684 
 9685 // Rotate Left by variable
 9686 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9687 %{
 9688   predicate(n->bottom_type()->basic_type() == T_LONG);
 9689   match(Set dst (RotateLeft dst shift));
 9690   effect(KILL cr);
 9691   format %{ "rolq    $dst, $shift" %}
 9692   ins_encode %{
 9693     __ rolq($dst$$Register);
 9694   %}
 9695   ins_pipe(ialu_reg_reg);
 9696 %}
 9697 
 9698 // Rotate Right by constant.
 9699 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
 9700 %{
 9701   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9702   match(Set dst (RotateRight dst shift));
 9703   effect(KILL cr);
 9704   format %{ "rorq    $dst, $shift" %}
 9705   ins_encode %{
 9706     __ rorq($dst$$Register, $shift$$constant);
 9707   %}
 9708   ins_pipe(ialu_reg);
 9709 %}
 9710 
 9711 // Rotate Right by constant
 9712 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
 9713 %{
 9714   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9715   match(Set dst (RotateRight src shift));
 9716   format %{ "rorxq   $dst, $src, $shift" %}
 9717   ins_encode %{
 9718     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
 9719   %}
 9720   ins_pipe(ialu_reg_reg);
 9721 %}
 9722 
 9723 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
 9724 %{
 9725   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
 9726   match(Set dst (RotateRight (LoadL src) shift));
 9727   ins_cost(175);
 9728   format %{ "rorxq   $dst, $src, $shift" %}
 9729   ins_encode %{
 9730     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
 9731   %}
 9732   ins_pipe(ialu_reg_mem);
 9733 %}
 9734 
 9735 // Rotate Right by variable
 9736 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
 9737 %{
 9738   predicate(n->bottom_type()->basic_type() == T_LONG);
 9739   match(Set dst (RotateRight dst shift));
 9740   effect(KILL cr);
 9741   format %{ "rorq    $dst, $shift" %}
 9742   ins_encode %{
 9743     __ rorq($dst$$Register);
 9744   %}
 9745   ins_pipe(ialu_reg_reg);
 9746 %}
 9747 
 9748 //----------------------------- CompressBits/ExpandBits ------------------------
 9749 
 9750 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9751   predicate(n->bottom_type()->isa_long());
 9752   match(Set dst (CompressBits src mask));
 9753   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9754   ins_encode %{
 9755     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
 9756   %}
 9757   ins_pipe( pipe_slow );
 9758 %}
 9759 
 9760 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
 9761   predicate(n->bottom_type()->isa_long());
 9762   match(Set dst (ExpandBits src mask));
 9763   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9764   ins_encode %{
 9765     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
 9766   %}
 9767   ins_pipe( pipe_slow );
 9768 %}
 9769 
 9770 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9771   predicate(n->bottom_type()->isa_long());
 9772   match(Set dst (CompressBits src (LoadL mask)));
 9773   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
 9774   ins_encode %{
 9775     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
 9776   %}
 9777   ins_pipe( pipe_slow );
 9778 %}
 9779 
 9780 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
 9781   predicate(n->bottom_type()->isa_long());
 9782   match(Set dst (ExpandBits src (LoadL mask)));
 9783   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
 9784   ins_encode %{
 9785     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
 9786   %}
 9787   ins_pipe( pipe_slow );
 9788 %}
 9789 
 9790 
 9791 // Logical Instructions
 9792 
 9793 // Integer Logical Instructions
 9794 
 9795 // And Instructions
 9796 // And Register with Register
 9797 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9798 %{
 9799   match(Set dst (AndI dst src));
 9800   effect(KILL cr);
 9801 
 9802   format %{ "andl    $dst, $src\t# int" %}
 9803   ins_encode %{
 9804     __ andl($dst$$Register, $src$$Register);
 9805   %}
 9806   ins_pipe(ialu_reg_reg);
 9807 %}
 9808 
 9809 // And Register with Immediate 255
 9810 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
 9811 %{
 9812   match(Set dst (AndI src mask));
 9813 
 9814   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
 9815   ins_encode %{
 9816     __ movzbl($dst$$Register, $src$$Register);
 9817   %}
 9818   ins_pipe(ialu_reg);
 9819 %}
 9820 
 9821 // And Register with Immediate 255 and promote to long
 9822 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
 9823 %{
 9824   match(Set dst (ConvI2L (AndI src mask)));
 9825 
 9826   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
 9827   ins_encode %{
 9828     __ movzbl($dst$$Register, $src$$Register);
 9829   %}
 9830   ins_pipe(ialu_reg);
 9831 %}
 9832 
 9833 // And Register with Immediate 65535
 9834 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
 9835 %{
 9836   match(Set dst (AndI src mask));
 9837 
 9838   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
 9839   ins_encode %{
 9840     __ movzwl($dst$$Register, $src$$Register);
 9841   %}
 9842   ins_pipe(ialu_reg);
 9843 %}
 9844 
 9845 // And Register with Immediate 65535 and promote to long
 9846 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
 9847 %{
 9848   match(Set dst (ConvI2L (AndI src mask)));
 9849 
 9850   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
 9851   ins_encode %{
 9852     __ movzwl($dst$$Register, $src$$Register);
 9853   %}
 9854   ins_pipe(ialu_reg);
 9855 %}
 9856 
 9857 // Can skip int2long conversions after AND with small bitmask
 9858 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
 9859 %{
 9860   predicate(VM_Version::supports_bmi2());
 9861   ins_cost(125);
 9862   effect(TEMP tmp, KILL cr);
 9863   match(Set dst (ConvI2L (AndI src mask)));
 9864   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
 9865   ins_encode %{
 9866     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
 9867     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
 9868   %}
 9869   ins_pipe(ialu_reg_reg);
 9870 %}
 9871 
 9872 // And Register with Immediate
 9873 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9874 %{
 9875   match(Set dst (AndI dst src));
 9876   effect(KILL cr);
 9877 
 9878   format %{ "andl    $dst, $src\t# int" %}
 9879   ins_encode %{
 9880     __ andl($dst$$Register, $src$$constant);
 9881   %}
 9882   ins_pipe(ialu_reg);
 9883 %}
 9884 
 9885 // And Register with Memory
 9886 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9887 %{
 9888   match(Set dst (AndI dst (LoadI src)));
 9889   effect(KILL cr);
 9890 
 9891   ins_cost(150);
 9892   format %{ "andl    $dst, $src\t# int" %}
 9893   ins_encode %{
 9894     __ andl($dst$$Register, $src$$Address);
 9895   %}
 9896   ins_pipe(ialu_reg_mem);
 9897 %}
 9898 
 9899 // And Memory with Register
 9900 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9901 %{
 9902   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
 9903   effect(KILL cr);
 9904 
 9905   ins_cost(150);
 9906   format %{ "andb    $dst, $src\t# byte" %}
 9907   ins_encode %{
 9908     __ andb($dst$$Address, $src$$Register);
 9909   %}
 9910   ins_pipe(ialu_mem_reg);
 9911 %}
 9912 
 9913 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9914 %{
 9915   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9916   effect(KILL cr);
 9917 
 9918   ins_cost(150);
 9919   format %{ "andl    $dst, $src\t# int" %}
 9920   ins_encode %{
 9921     __ andl($dst$$Address, $src$$Register);
 9922   %}
 9923   ins_pipe(ialu_mem_reg);
 9924 %}
 9925 
 9926 // And Memory with Immediate
 9927 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9928 %{
 9929   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 9930   effect(KILL cr);
 9931 
 9932   ins_cost(125);
 9933   format %{ "andl    $dst, $src\t# int" %}
 9934   ins_encode %{
 9935     __ andl($dst$$Address, $src$$constant);
 9936   %}
 9937   ins_pipe(ialu_mem_imm);
 9938 %}
 9939 
 9940 // BMI1 instructions
 9941 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
 9942   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
 9943   predicate(UseBMI1Instructions);
 9944   effect(KILL cr);
 9945 
 9946   ins_cost(125);
 9947   format %{ "andnl  $dst, $src1, $src2" %}
 9948 
 9949   ins_encode %{
 9950     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 9951   %}
 9952   ins_pipe(ialu_reg_mem);
 9953 %}
 9954 
 9955 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
 9956   match(Set dst (AndI (XorI src1 minus_1) src2));
 9957   predicate(UseBMI1Instructions);
 9958   effect(KILL cr);
 9959 
 9960   format %{ "andnl  $dst, $src1, $src2" %}
 9961 
 9962   ins_encode %{
 9963     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 9964   %}
 9965   ins_pipe(ialu_reg);
 9966 %}
 9967 
 9968 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
 9969   match(Set dst (AndI (SubI imm_zero src) src));
 9970   predicate(UseBMI1Instructions);
 9971   effect(KILL cr);
 9972 
 9973   format %{ "blsil  $dst, $src" %}
 9974 
 9975   ins_encode %{
 9976     __ blsil($dst$$Register, $src$$Register);
 9977   %}
 9978   ins_pipe(ialu_reg);
 9979 %}
 9980 
 9981 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
 9982   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 9983   predicate(UseBMI1Instructions);
 9984   effect(KILL cr);
 9985 
 9986   ins_cost(125);
 9987   format %{ "blsil  $dst, $src" %}
 9988 
 9989   ins_encode %{
 9990     __ blsil($dst$$Register, $src$$Address);
 9991   %}
 9992   ins_pipe(ialu_reg_mem);
 9993 %}
 9994 
 9995 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
 9996 %{
 9997   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
 9998   predicate(UseBMI1Instructions);
 9999   effect(KILL cr);
10000 
10001   ins_cost(125);
10002   format %{ "blsmskl $dst, $src" %}
10003 
10004   ins_encode %{
10005     __ blsmskl($dst$$Register, $src$$Address);
10006   %}
10007   ins_pipe(ialu_reg_mem);
10008 %}
10009 
10010 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10011 %{
10012   match(Set dst (XorI (AddI src minus_1) src));
10013   predicate(UseBMI1Instructions);
10014   effect(KILL cr);
10015 
10016   format %{ "blsmskl $dst, $src" %}
10017 
10018   ins_encode %{
10019     __ blsmskl($dst$$Register, $src$$Register);
10020   %}
10021 
10022   ins_pipe(ialu_reg);
10023 %}
10024 
10025 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
10026 %{
10027   match(Set dst (AndI (AddI src minus_1) src) );
10028   predicate(UseBMI1Instructions);
10029   effect(KILL cr);
10030 
10031   format %{ "blsrl  $dst, $src" %}
10032 
10033   ins_encode %{
10034     __ blsrl($dst$$Register, $src$$Register);
10035   %}
10036 
10037   ins_pipe(ialu_reg_mem);
10038 %}
10039 
10040 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
10041 %{
10042   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
10043   predicate(UseBMI1Instructions);
10044   effect(KILL cr);
10045 
10046   ins_cost(125);
10047   format %{ "blsrl  $dst, $src" %}
10048 
10049   ins_encode %{
10050     __ blsrl($dst$$Register, $src$$Address);
10051   %}
10052 
10053   ins_pipe(ialu_reg);
10054 %}
10055 
10056 // Or Instructions
10057 // Or Register with Register
10058 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10059 %{
10060   match(Set dst (OrI dst src));
10061   effect(KILL cr);
10062 
10063   format %{ "orl     $dst, $src\t# int" %}
10064   ins_encode %{
10065     __ orl($dst$$Register, $src$$Register);
10066   %}
10067   ins_pipe(ialu_reg_reg);
10068 %}
10069 
10070 // Or Register with Immediate
10071 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10072 %{
10073   match(Set dst (OrI dst src));
10074   effect(KILL cr);
10075 
10076   format %{ "orl     $dst, $src\t# int" %}
10077   ins_encode %{
10078     __ orl($dst$$Register, $src$$constant);
10079   %}
10080   ins_pipe(ialu_reg);
10081 %}
10082 
10083 // Or Register with Memory
10084 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10085 %{
10086   match(Set dst (OrI dst (LoadI src)));
10087   effect(KILL cr);
10088 
10089   ins_cost(150);
10090   format %{ "orl     $dst, $src\t# int" %}
10091   ins_encode %{
10092     __ orl($dst$$Register, $src$$Address);
10093   %}
10094   ins_pipe(ialu_reg_mem);
10095 %}
10096 
10097 // Or Memory with Register
10098 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10099 %{
10100   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
10101   effect(KILL cr);
10102 
10103   ins_cost(150);
10104   format %{ "orb    $dst, $src\t# byte" %}
10105   ins_encode %{
10106     __ orb($dst$$Address, $src$$Register);
10107   %}
10108   ins_pipe(ialu_mem_reg);
10109 %}
10110 
10111 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10112 %{
10113   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10114   effect(KILL cr);
10115 
10116   ins_cost(150);
10117   format %{ "orl     $dst, $src\t# int" %}
10118   ins_encode %{
10119     __ orl($dst$$Address, $src$$Register);
10120   %}
10121   ins_pipe(ialu_mem_reg);
10122 %}
10123 
10124 // Or Memory with Immediate
10125 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
10126 %{
10127   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
10128   effect(KILL cr);
10129 
10130   ins_cost(125);
10131   format %{ "orl     $dst, $src\t# int" %}
10132   ins_encode %{
10133     __ orl($dst$$Address, $src$$constant);
10134   %}
10135   ins_pipe(ialu_mem_imm);
10136 %}
10137 
10138 // Xor Instructions
10139 // Xor Register with Register
10140 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10141 %{
10142   match(Set dst (XorI dst src));
10143   effect(KILL cr);
10144 
10145   format %{ "xorl    $dst, $src\t# int" %}
10146   ins_encode %{
10147     __ xorl($dst$$Register, $src$$Register);
10148   %}
10149   ins_pipe(ialu_reg_reg);
10150 %}
10151 
10152 // Xor Register with Immediate -1
10153 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
10154   match(Set dst (XorI dst imm));
10155 
10156   format %{ "not    $dst" %}
10157   ins_encode %{
10158      __ notl($dst$$Register);
10159   %}
10160   ins_pipe(ialu_reg);
10161 %}
10162 
10163 // Xor Register with Immediate
10164 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
10165 %{
10166   match(Set dst (XorI dst src));
10167   effect(KILL cr);
10168 
10169   format %{ "xorl    $dst, $src\t# int" %}
10170   ins_encode %{
10171     __ xorl($dst$$Register, $src$$constant);
10172   %}
10173   ins_pipe(ialu_reg);
10174 %}
10175 
10176 // Xor Register with Memory
10177 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10178 %{
10179   match(Set dst (XorI dst (LoadI src)));
10180   effect(KILL cr);
10181 
10182   ins_cost(150);
10183   format %{ "xorl    $dst, $src\t# int" %}
10184   ins_encode %{
10185     __ xorl($dst$$Register, $src$$Address);
10186   %}
10187   ins_pipe(ialu_reg_mem);
10188 %}
10189 
10190 // Xor Memory with Register
10191 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10192 %{
10193   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
10194   effect(KILL cr);
10195 
10196   ins_cost(150);
10197   format %{ "xorb    $dst, $src\t# byte" %}
10198   ins_encode %{
10199     __ xorb($dst$$Address, $src$$Register);
10200   %}
10201   ins_pipe(ialu_mem_reg);
10202 %}
10203 
10204 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10205 %{
10206   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10207   effect(KILL cr);
10208 
10209   ins_cost(150);
10210   format %{ "xorl    $dst, $src\t# int" %}
10211   ins_encode %{
10212     __ xorl($dst$$Address, $src$$Register);
10213   %}
10214   ins_pipe(ialu_mem_reg);
10215 %}
10216 
10217 // Xor Memory with Immediate
10218 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
10219 %{
10220   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
10221   effect(KILL cr);
10222 
10223   ins_cost(125);
10224   format %{ "xorl    $dst, $src\t# int" %}
10225   ins_encode %{
10226     __ xorl($dst$$Address, $src$$constant);
10227   %}
10228   ins_pipe(ialu_mem_imm);
10229 %}
10230 
10231 
10232 // Long Logical Instructions
10233 
10234 // And Instructions
10235 // And Register with Register
10236 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10237 %{
10238   match(Set dst (AndL dst src));
10239   effect(KILL cr);
10240 
10241   format %{ "andq    $dst, $src\t# long" %}
10242   ins_encode %{
10243     __ andq($dst$$Register, $src$$Register);
10244   %}
10245   ins_pipe(ialu_reg_reg);
10246 %}
10247 
10248 // And Register with Immediate 255
10249 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
10250 %{
10251   match(Set dst (AndL src mask));
10252 
10253   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
10254   ins_encode %{
10255     // movzbl zeroes out the upper 32-bit and does not need REX.W
10256     __ movzbl($dst$$Register, $src$$Register);
10257   %}
10258   ins_pipe(ialu_reg);
10259 %}
10260 
10261 // And Register with Immediate 65535
10262 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
10263 %{
10264   match(Set dst (AndL src mask));
10265 
10266   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
10267   ins_encode %{
10268     // movzwl zeroes out the upper 32-bit and does not need REX.W
10269     __ movzwl($dst$$Register, $src$$Register);
10270   %}
10271   ins_pipe(ialu_reg);
10272 %}
10273 
10274 // And Register with Immediate
10275 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10276 %{
10277   match(Set dst (AndL dst src));
10278   effect(KILL cr);
10279 
10280   format %{ "andq    $dst, $src\t# long" %}
10281   ins_encode %{
10282     __ andq($dst$$Register, $src$$constant);
10283   %}
10284   ins_pipe(ialu_reg);
10285 %}
10286 
10287 // And Register with Memory
10288 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10289 %{
10290   match(Set dst (AndL dst (LoadL src)));
10291   effect(KILL cr);
10292 
10293   ins_cost(150);
10294   format %{ "andq    $dst, $src\t# long" %}
10295   ins_encode %{
10296     __ andq($dst$$Register, $src$$Address);
10297   %}
10298   ins_pipe(ialu_reg_mem);
10299 %}
10300 
10301 // And Memory with Register
10302 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10303 %{
10304   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10305   effect(KILL cr);
10306 
10307   ins_cost(150);
10308   format %{ "andq    $dst, $src\t# long" %}
10309   ins_encode %{
10310     __ andq($dst$$Address, $src$$Register);
10311   %}
10312   ins_pipe(ialu_mem_reg);
10313 %}
10314 
10315 // And Memory with Immediate
10316 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10317 %{
10318   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
10319   effect(KILL cr);
10320 
10321   ins_cost(125);
10322   format %{ "andq    $dst, $src\t# long" %}
10323   ins_encode %{
10324     __ andq($dst$$Address, $src$$constant);
10325   %}
10326   ins_pipe(ialu_mem_imm);
10327 %}
10328 
10329 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
10330 %{
10331   // con should be a pure 64-bit immediate given that not(con) is a power of 2
10332   // because AND/OR works well enough for 8/32-bit values.
10333   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
10334 
10335   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
10336   effect(KILL cr);
10337 
10338   ins_cost(125);
10339   format %{ "btrq    $dst, log2(not($con))\t# long" %}
10340   ins_encode %{
10341     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
10342   %}
10343   ins_pipe(ialu_mem_imm);
10344 %}
10345 
10346 // BMI1 instructions
10347 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
10348   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
10349   predicate(UseBMI1Instructions);
10350   effect(KILL cr);
10351 
10352   ins_cost(125);
10353   format %{ "andnq  $dst, $src1, $src2" %}
10354 
10355   ins_encode %{
10356     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
10357   %}
10358   ins_pipe(ialu_reg_mem);
10359 %}
10360 
10361 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
10362   match(Set dst (AndL (XorL src1 minus_1) src2));
10363   predicate(UseBMI1Instructions);
10364   effect(KILL cr);
10365 
10366   format %{ "andnq  $dst, $src1, $src2" %}
10367 
10368   ins_encode %{
10369   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
10370   %}
10371   ins_pipe(ialu_reg_mem);
10372 %}
10373 
10374 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
10375   match(Set dst (AndL (SubL imm_zero src) src));
10376   predicate(UseBMI1Instructions);
10377   effect(KILL cr);
10378 
10379   format %{ "blsiq  $dst, $src" %}
10380 
10381   ins_encode %{
10382     __ blsiq($dst$$Register, $src$$Register);
10383   %}
10384   ins_pipe(ialu_reg);
10385 %}
10386 
10387 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
10388   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
10389   predicate(UseBMI1Instructions);
10390   effect(KILL cr);
10391 
10392   ins_cost(125);
10393   format %{ "blsiq  $dst, $src" %}
10394 
10395   ins_encode %{
10396     __ blsiq($dst$$Register, $src$$Address);
10397   %}
10398   ins_pipe(ialu_reg_mem);
10399 %}
10400 
10401 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10402 %{
10403   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
10404   predicate(UseBMI1Instructions);
10405   effect(KILL cr);
10406 
10407   ins_cost(125);
10408   format %{ "blsmskq $dst, $src" %}
10409 
10410   ins_encode %{
10411     __ blsmskq($dst$$Register, $src$$Address);
10412   %}
10413   ins_pipe(ialu_reg_mem);
10414 %}
10415 
10416 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10417 %{
10418   match(Set dst (XorL (AddL src minus_1) src));
10419   predicate(UseBMI1Instructions);
10420   effect(KILL cr);
10421 
10422   format %{ "blsmskq $dst, $src" %}
10423 
10424   ins_encode %{
10425     __ blsmskq($dst$$Register, $src$$Register);
10426   %}
10427 
10428   ins_pipe(ialu_reg);
10429 %}
10430 
10431 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
10432 %{
10433   match(Set dst (AndL (AddL src minus_1) src) );
10434   predicate(UseBMI1Instructions);
10435   effect(KILL cr);
10436 
10437   format %{ "blsrq  $dst, $src" %}
10438 
10439   ins_encode %{
10440     __ blsrq($dst$$Register, $src$$Register);
10441   %}
10442 
10443   ins_pipe(ialu_reg);
10444 %}
10445 
10446 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
10447 %{
10448   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
10449   predicate(UseBMI1Instructions);
10450   effect(KILL cr);
10451 
10452   ins_cost(125);
10453   format %{ "blsrq  $dst, $src" %}
10454 
10455   ins_encode %{
10456     __ blsrq($dst$$Register, $src$$Address);
10457   %}
10458 
10459   ins_pipe(ialu_reg);
10460 %}
10461 
10462 // Or Instructions
10463 // Or Register with Register
10464 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10465 %{
10466   match(Set dst (OrL dst src));
10467   effect(KILL cr);
10468 
10469   format %{ "orq     $dst, $src\t# long" %}
10470   ins_encode %{
10471     __ orq($dst$$Register, $src$$Register);
10472   %}
10473   ins_pipe(ialu_reg_reg);
10474 %}
10475 
10476 // Use any_RegP to match R15 (TLS register) without spilling.
10477 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
10478   match(Set dst (OrL dst (CastP2X src)));
10479   effect(KILL cr);
10480 
10481   format %{ "orq     $dst, $src\t# long" %}
10482   ins_encode %{
10483     __ orq($dst$$Register, $src$$Register);
10484   %}
10485   ins_pipe(ialu_reg_reg);
10486 %}
10487 
10488 
10489 // Or Register with Immediate
10490 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10491 %{
10492   match(Set dst (OrL dst src));
10493   effect(KILL cr);
10494 
10495   format %{ "orq     $dst, $src\t# long" %}
10496   ins_encode %{
10497     __ orq($dst$$Register, $src$$constant);
10498   %}
10499   ins_pipe(ialu_reg);
10500 %}
10501 
10502 // Or Register with Memory
10503 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10504 %{
10505   match(Set dst (OrL dst (LoadL src)));
10506   effect(KILL cr);
10507 
10508   ins_cost(150);
10509   format %{ "orq     $dst, $src\t# long" %}
10510   ins_encode %{
10511     __ orq($dst$$Register, $src$$Address);
10512   %}
10513   ins_pipe(ialu_reg_mem);
10514 %}
10515 
10516 // Or Memory with Register
10517 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10518 %{
10519   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10520   effect(KILL cr);
10521 
10522   ins_cost(150);
10523   format %{ "orq     $dst, $src\t# long" %}
10524   ins_encode %{
10525     __ orq($dst$$Address, $src$$Register);
10526   %}
10527   ins_pipe(ialu_mem_reg);
10528 %}
10529 
10530 // Or Memory with Immediate
10531 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10532 %{
10533   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
10534   effect(KILL cr);
10535 
10536   ins_cost(125);
10537   format %{ "orq     $dst, $src\t# long" %}
10538   ins_encode %{
10539     __ orq($dst$$Address, $src$$constant);
10540   %}
10541   ins_pipe(ialu_mem_imm);
10542 %}
10543 
10544 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
10545 %{
10546   // con should be a pure 64-bit power of 2 immediate
10547   // because AND/OR works well enough for 8/32-bit values.
10548   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
10549 
10550   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
10551   effect(KILL cr);
10552 
10553   ins_cost(125);
10554   format %{ "btsq    $dst, log2($con)\t# long" %}
10555   ins_encode %{
10556     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
10557   %}
10558   ins_pipe(ialu_mem_imm);
10559 %}
10560 
10561 // Xor Instructions
10562 // Xor Register with Register
10563 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10564 %{
10565   match(Set dst (XorL dst src));
10566   effect(KILL cr);
10567 
10568   format %{ "xorq    $dst, $src\t# long" %}
10569   ins_encode %{
10570     __ xorq($dst$$Register, $src$$Register);
10571   %}
10572   ins_pipe(ialu_reg_reg);
10573 %}
10574 
10575 // Xor Register with Immediate -1
10576 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
10577   match(Set dst (XorL dst imm));
10578 
10579   format %{ "notq   $dst" %}
10580   ins_encode %{
10581      __ notq($dst$$Register);
10582   %}
10583   ins_pipe(ialu_reg);
10584 %}
10585 
10586 // Xor Register with Immediate
10587 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10588 %{
10589   match(Set dst (XorL dst src));
10590   effect(KILL cr);
10591 
10592   format %{ "xorq    $dst, $src\t# long" %}
10593   ins_encode %{
10594     __ xorq($dst$$Register, $src$$constant);
10595   %}
10596   ins_pipe(ialu_reg);
10597 %}
10598 
10599 // Xor Register with Memory
10600 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10601 %{
10602   match(Set dst (XorL dst (LoadL src)));
10603   effect(KILL cr);
10604 
10605   ins_cost(150);
10606   format %{ "xorq    $dst, $src\t# long" %}
10607   ins_encode %{
10608     __ xorq($dst$$Register, $src$$Address);
10609   %}
10610   ins_pipe(ialu_reg_mem);
10611 %}
10612 
10613 // Xor Memory with Register
10614 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10615 %{
10616   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10617   effect(KILL cr);
10618 
10619   ins_cost(150);
10620   format %{ "xorq    $dst, $src\t# long" %}
10621   ins_encode %{
10622     __ xorq($dst$$Address, $src$$Register);
10623   %}
10624   ins_pipe(ialu_mem_reg);
10625 %}
10626 
10627 // Xor Memory with Immediate
10628 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10629 %{
10630   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
10631   effect(KILL cr);
10632 
10633   ins_cost(125);
10634   format %{ "xorq    $dst, $src\t# long" %}
10635   ins_encode %{
10636     __ xorq($dst$$Address, $src$$constant);
10637   %}
10638   ins_pipe(ialu_mem_imm);
10639 %}
10640 
10641 // Convert Int to Boolean
10642 instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
10643 %{
10644   match(Set dst (Conv2B src));
10645   effect(KILL cr);
10646 
10647   format %{ "testl   $src, $src\t# ci2b\n\t"
10648             "setnz   $dst\n\t"
10649             "movzbl  $dst, $dst" %}
10650   ins_encode %{
10651     __ testl($src$$Register, $src$$Register);
10652     __ set_byte_if_not_zero($dst$$Register);
10653     __ movzbl($dst$$Register, $dst$$Register);
10654   %}
10655   ins_pipe(pipe_slow); // XXX
10656 %}
10657 
10658 // Convert Pointer to Boolean
10659 instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
10660 %{
10661   match(Set dst (Conv2B src));
10662   effect(KILL cr);
10663 
10664   format %{ "testq   $src, $src\t# cp2b\n\t"
10665             "setnz   $dst\n\t"
10666             "movzbl  $dst, $dst" %}
10667   ins_encode %{
10668     __ testq($src$$Register, $src$$Register);
10669     __ set_byte_if_not_zero($dst$$Register);
10670     __ movzbl($dst$$Register, $dst$$Register);
10671   %}
10672   ins_pipe(pipe_slow); // XXX
10673 %}
10674 
10675 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
10676 %{
10677   match(Set dst (CmpLTMask p q));
10678   effect(KILL cr);
10679 
10680   ins_cost(400);
10681   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
10682             "setlt   $dst\n\t"
10683             "movzbl  $dst, $dst\n\t"
10684             "negl    $dst" %}
10685   ins_encode %{
10686     __ cmpl($p$$Register, $q$$Register);
10687     __ setl($dst$$Register);
10688     __ movzbl($dst$$Register, $dst$$Register);
10689     __ negl($dst$$Register);
10690   %}
10691   ins_pipe(pipe_slow);
10692 %}
10693 
10694 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
10695 %{
10696   match(Set dst (CmpLTMask dst zero));
10697   effect(KILL cr);
10698 
10699   ins_cost(100);
10700   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
10701   ins_encode %{
10702     __ sarl($dst$$Register, 31);
10703   %}
10704   ins_pipe(ialu_reg);
10705 %}
10706 
10707 /* Better to save a register than avoid a branch */
10708 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10709 %{
10710   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
10711   effect(KILL cr);
10712   ins_cost(300);
10713   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
10714             "jge     done\n\t"
10715             "addl    $p,$y\n"
10716             "done:   " %}
10717   ins_encode %{
10718     Register Rp = $p$$Register;
10719     Register Rq = $q$$Register;
10720     Register Ry = $y$$Register;
10721     Label done;
10722     __ subl(Rp, Rq);
10723     __ jccb(Assembler::greaterEqual, done);
10724     __ addl(Rp, Ry);
10725     __ bind(done);
10726   %}
10727   ins_pipe(pipe_cmplt);
10728 %}
10729 
10730 /* Better to save a register than avoid a branch */
10731 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
10732 %{
10733   match(Set y (AndI (CmpLTMask p q) y));
10734   effect(KILL cr);
10735 
10736   ins_cost(300);
10737 
10738   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
10739             "jlt     done\n\t"
10740             "xorl    $y, $y\n"
10741             "done:   " %}
10742   ins_encode %{
10743     Register Rp = $p$$Register;
10744     Register Rq = $q$$Register;
10745     Register Ry = $y$$Register;
10746     Label done;
10747     __ cmpl(Rp, Rq);
10748     __ jccb(Assembler::less, done);
10749     __ xorl(Ry, Ry);
10750     __ bind(done);
10751   %}
10752   ins_pipe(pipe_cmplt);
10753 %}
10754 
10755 
10756 //---------- FP Instructions------------------------------------------------
10757 
10758 // Really expensive, avoid
10759 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
10760 %{
10761   match(Set cr (CmpF src1 src2));
10762 
10763   ins_cost(500);
10764   format %{ "ucomiss $src1, $src2\n\t"
10765             "jnp,s   exit\n\t"
10766             "pushfq\t# saw NaN, set CF\n\t"
10767             "andq    [rsp], #0xffffff2b\n\t"
10768             "popfq\n"
10769     "exit:" %}
10770   ins_encode %{
10771     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10772     emit_cmpfp_fixup(_masm);
10773   %}
10774   ins_pipe(pipe_slow);
10775 %}
10776 
10777 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
10778   match(Set cr (CmpF src1 src2));
10779 
10780   ins_cost(100);
10781   format %{ "ucomiss $src1, $src2" %}
10782   ins_encode %{
10783     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10784   %}
10785   ins_pipe(pipe_slow);
10786 %}
10787 
10788 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
10789   match(Set cr (CmpF src1 (LoadF src2)));
10790 
10791   ins_cost(100);
10792   format %{ "ucomiss $src1, $src2" %}
10793   ins_encode %{
10794     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10795   %}
10796   ins_pipe(pipe_slow);
10797 %}
10798 
10799 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
10800   match(Set cr (CmpF src con));
10801   ins_cost(100);
10802   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
10803   ins_encode %{
10804     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10805   %}
10806   ins_pipe(pipe_slow);
10807 %}
10808 
10809 // Really expensive, avoid
10810 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
10811 %{
10812   match(Set cr (CmpD src1 src2));
10813 
10814   ins_cost(500);
10815   format %{ "ucomisd $src1, $src2\n\t"
10816             "jnp,s   exit\n\t"
10817             "pushfq\t# saw NaN, set CF\n\t"
10818             "andq    [rsp], #0xffffff2b\n\t"
10819             "popfq\n"
10820     "exit:" %}
10821   ins_encode %{
10822     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10823     emit_cmpfp_fixup(_masm);
10824   %}
10825   ins_pipe(pipe_slow);
10826 %}
10827 
10828 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
10829   match(Set cr (CmpD src1 src2));
10830 
10831   ins_cost(100);
10832   format %{ "ucomisd $src1, $src2 test" %}
10833   ins_encode %{
10834     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10835   %}
10836   ins_pipe(pipe_slow);
10837 %}
10838 
10839 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
10840   match(Set cr (CmpD src1 (LoadD src2)));
10841 
10842   ins_cost(100);
10843   format %{ "ucomisd $src1, $src2" %}
10844   ins_encode %{
10845     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10846   %}
10847   ins_pipe(pipe_slow);
10848 %}
10849 
10850 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
10851   match(Set cr (CmpD src con));
10852   ins_cost(100);
10853   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
10854   ins_encode %{
10855     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10856   %}
10857   ins_pipe(pipe_slow);
10858 %}
10859 
10860 // Compare into -1,0,1
10861 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
10862 %{
10863   match(Set dst (CmpF3 src1 src2));
10864   effect(KILL cr);
10865 
10866   ins_cost(275);
10867   format %{ "ucomiss $src1, $src2\n\t"
10868             "movl    $dst, #-1\n\t"
10869             "jp,s    done\n\t"
10870             "jb,s    done\n\t"
10871             "setne   $dst\n\t"
10872             "movzbl  $dst, $dst\n"
10873     "done:" %}
10874   ins_encode %{
10875     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10876     emit_cmpfp3(_masm, $dst$$Register);
10877   %}
10878   ins_pipe(pipe_slow);
10879 %}
10880 
10881 // Compare into -1,0,1
10882 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
10883 %{
10884   match(Set dst (CmpF3 src1 (LoadF src2)));
10885   effect(KILL cr);
10886 
10887   ins_cost(275);
10888   format %{ "ucomiss $src1, $src2\n\t"
10889             "movl    $dst, #-1\n\t"
10890             "jp,s    done\n\t"
10891             "jb,s    done\n\t"
10892             "setne   $dst\n\t"
10893             "movzbl  $dst, $dst\n"
10894     "done:" %}
10895   ins_encode %{
10896     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10897     emit_cmpfp3(_masm, $dst$$Register);
10898   %}
10899   ins_pipe(pipe_slow);
10900 %}
10901 
10902 // Compare into -1,0,1
10903 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
10904   match(Set dst (CmpF3 src con));
10905   effect(KILL cr);
10906 
10907   ins_cost(275);
10908   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
10909             "movl    $dst, #-1\n\t"
10910             "jp,s    done\n\t"
10911             "jb,s    done\n\t"
10912             "setne   $dst\n\t"
10913             "movzbl  $dst, $dst\n"
10914     "done:" %}
10915   ins_encode %{
10916     __ ucomiss($src$$XMMRegister, $constantaddress($con));
10917     emit_cmpfp3(_masm, $dst$$Register);
10918   %}
10919   ins_pipe(pipe_slow);
10920 %}
10921 
10922 // Compare into -1,0,1
10923 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
10924 %{
10925   match(Set dst (CmpD3 src1 src2));
10926   effect(KILL cr);
10927 
10928   ins_cost(275);
10929   format %{ "ucomisd $src1, $src2\n\t"
10930             "movl    $dst, #-1\n\t"
10931             "jp,s    done\n\t"
10932             "jb,s    done\n\t"
10933             "setne   $dst\n\t"
10934             "movzbl  $dst, $dst\n"
10935     "done:" %}
10936   ins_encode %{
10937     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10938     emit_cmpfp3(_masm, $dst$$Register);
10939   %}
10940   ins_pipe(pipe_slow);
10941 %}
10942 
10943 // Compare into -1,0,1
10944 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
10945 %{
10946   match(Set dst (CmpD3 src1 (LoadD src2)));
10947   effect(KILL cr);
10948 
10949   ins_cost(275);
10950   format %{ "ucomisd $src1, $src2\n\t"
10951             "movl    $dst, #-1\n\t"
10952             "jp,s    done\n\t"
10953             "jb,s    done\n\t"
10954             "setne   $dst\n\t"
10955             "movzbl  $dst, $dst\n"
10956     "done:" %}
10957   ins_encode %{
10958     __ ucomisd($src1$$XMMRegister, $src2$$Address);
10959     emit_cmpfp3(_masm, $dst$$Register);
10960   %}
10961   ins_pipe(pipe_slow);
10962 %}
10963 
10964 // Compare into -1,0,1
10965 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
10966   match(Set dst (CmpD3 src con));
10967   effect(KILL cr);
10968 
10969   ins_cost(275);
10970   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
10971             "movl    $dst, #-1\n\t"
10972             "jp,s    done\n\t"
10973             "jb,s    done\n\t"
10974             "setne   $dst\n\t"
10975             "movzbl  $dst, $dst\n"
10976     "done:" %}
10977   ins_encode %{
10978     __ ucomisd($src$$XMMRegister, $constantaddress($con));
10979     emit_cmpfp3(_masm, $dst$$Register);
10980   %}
10981   ins_pipe(pipe_slow);
10982 %}
10983 
10984 //----------Arithmetic Conversion Instructions---------------------------------
10985 
10986 instruct convF2D_reg_reg(regD dst, regF src)
10987 %{
10988   match(Set dst (ConvF2D src));
10989 
10990   format %{ "cvtss2sd $dst, $src" %}
10991   ins_encode %{
10992     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10993   %}
10994   ins_pipe(pipe_slow); // XXX
10995 %}
10996 
10997 instruct convF2D_reg_mem(regD dst, memory src)
10998 %{
10999   match(Set dst (ConvF2D (LoadF src)));
11000 
11001   format %{ "cvtss2sd $dst, $src" %}
11002   ins_encode %{
11003     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
11004   %}
11005   ins_pipe(pipe_slow); // XXX
11006 %}
11007 
11008 instruct convD2F_reg_reg(regF dst, regD src)
11009 %{
11010   match(Set dst (ConvD2F src));
11011 
11012   format %{ "cvtsd2ss $dst, $src" %}
11013   ins_encode %{
11014     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11015   %}
11016   ins_pipe(pipe_slow); // XXX
11017 %}
11018 
11019 instruct convD2F_reg_mem(regF dst, memory src)
11020 %{
11021   match(Set dst (ConvD2F (LoadD src)));
11022 
11023   format %{ "cvtsd2ss $dst, $src" %}
11024   ins_encode %{
11025     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
11026   %}
11027   ins_pipe(pipe_slow); // XXX
11028 %}
11029 
11030 // XXX do mem variants
11031 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
11032 %{
11033   match(Set dst (ConvF2I src));
11034   effect(KILL cr);
11035   format %{ "convert_f2i $dst,$src" %}
11036   ins_encode %{
11037     __ convert_f2i($dst$$Register, $src$$XMMRegister);
11038   %}
11039   ins_pipe(pipe_slow);
11040 %}
11041 
11042 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
11043 %{
11044   match(Set dst (ConvF2L src));
11045   effect(KILL cr);
11046   format %{ "convert_f2l $dst,$src"%}
11047   ins_encode %{
11048     __ convert_f2l($dst$$Register, $src$$XMMRegister);
11049   %}
11050   ins_pipe(pipe_slow);
11051 %}
11052 
11053 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
11054 %{
11055   match(Set dst (ConvD2I src));
11056   effect(KILL cr);
11057   format %{ "convert_d2i $dst,$src"%}
11058   ins_encode %{
11059     __ convert_d2i($dst$$Register, $src$$XMMRegister);
11060   %}
11061   ins_pipe(pipe_slow);
11062 %}
11063 
11064 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
11065 %{
11066   match(Set dst (ConvD2L src));
11067   effect(KILL cr);
11068   format %{ "convert_d2l $dst,$src"%}
11069   ins_encode %{
11070     __ convert_d2l($dst$$Register, $src$$XMMRegister);
11071   %}
11072   ins_pipe(pipe_slow);
11073 %}
11074 
11075 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11076 %{
11077   match(Set dst (RoundD src));
11078   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11079   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
11080   ins_encode %{
11081     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11082   %}
11083   ins_pipe(pipe_slow);
11084 %}
11085 
11086 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
11087 %{
11088   match(Set dst (RoundF src));
11089   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
11090   format %{ "round_float $dst,$src" %}
11091   ins_encode %{
11092     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
11093   %}
11094   ins_pipe(pipe_slow);
11095 %}
11096 
11097 instruct convI2F_reg_reg(regF dst, rRegI src)
11098 %{
11099   predicate(!UseXmmI2F);
11100   match(Set dst (ConvI2F src));
11101 
11102   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11103   ins_encode %{
11104     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11105   %}
11106   ins_pipe(pipe_slow); // XXX
11107 %}
11108 
11109 instruct convI2F_reg_mem(regF dst, memory src)
11110 %{
11111   match(Set dst (ConvI2F (LoadI src)));
11112 
11113   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
11114   ins_encode %{
11115     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
11116   %}
11117   ins_pipe(pipe_slow); // XXX
11118 %}
11119 
11120 instruct convI2D_reg_reg(regD dst, rRegI src)
11121 %{
11122   predicate(!UseXmmI2D);
11123   match(Set dst (ConvI2D src));
11124 
11125   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11126   ins_encode %{
11127     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11128   %}
11129   ins_pipe(pipe_slow); // XXX
11130 %}
11131 
11132 instruct convI2D_reg_mem(regD dst, memory src)
11133 %{
11134   match(Set dst (ConvI2D (LoadI src)));
11135 
11136   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
11137   ins_encode %{
11138     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
11139   %}
11140   ins_pipe(pipe_slow); // XXX
11141 %}
11142 
11143 instruct convXI2F_reg(regF dst, rRegI src)
11144 %{
11145   predicate(UseXmmI2F);
11146   match(Set dst (ConvI2F src));
11147 
11148   format %{ "movdl $dst, $src\n\t"
11149             "cvtdq2psl $dst, $dst\t# i2f" %}
11150   ins_encode %{
11151     __ movdl($dst$$XMMRegister, $src$$Register);
11152     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11153   %}
11154   ins_pipe(pipe_slow); // XXX
11155 %}
11156 
11157 instruct convXI2D_reg(regD dst, rRegI src)
11158 %{
11159   predicate(UseXmmI2D);
11160   match(Set dst (ConvI2D src));
11161 
11162   format %{ "movdl $dst, $src\n\t"
11163             "cvtdq2pdl $dst, $dst\t# i2d" %}
11164   ins_encode %{
11165     __ movdl($dst$$XMMRegister, $src$$Register);
11166     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11167   %}
11168   ins_pipe(pipe_slow); // XXX
11169 %}
11170 
11171 instruct convL2F_reg_reg(regF dst, rRegL src)
11172 %{
11173   match(Set dst (ConvL2F src));
11174 
11175   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11176   ins_encode %{
11177     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
11178   %}
11179   ins_pipe(pipe_slow); // XXX
11180 %}
11181 
11182 instruct convL2F_reg_mem(regF dst, memory src)
11183 %{
11184   match(Set dst (ConvL2F (LoadL src)));
11185 
11186   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
11187   ins_encode %{
11188     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
11189   %}
11190   ins_pipe(pipe_slow); // XXX
11191 %}
11192 
11193 instruct convL2D_reg_reg(regD dst, rRegL src)
11194 %{
11195   match(Set dst (ConvL2D src));
11196 
11197   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11198   ins_encode %{
11199     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
11200   %}
11201   ins_pipe(pipe_slow); // XXX
11202 %}
11203 
11204 instruct convL2D_reg_mem(regD dst, memory src)
11205 %{
11206   match(Set dst (ConvL2D (LoadL src)));
11207 
11208   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
11209   ins_encode %{
11210     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
11211   %}
11212   ins_pipe(pipe_slow); // XXX
11213 %}
11214 
11215 instruct convI2L_reg_reg(rRegL dst, rRegI src)
11216 %{
11217   match(Set dst (ConvI2L src));
11218 
11219   ins_cost(125);
11220   format %{ "movslq  $dst, $src\t# i2l" %}
11221   ins_encode %{
11222     __ movslq($dst$$Register, $src$$Register);
11223   %}
11224   ins_pipe(ialu_reg_reg);
11225 %}
11226 
11227 // instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
11228 // %{
11229 //   match(Set dst (ConvI2L src));
11230 // //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
11231 // //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
11232 //   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
11233 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
11234 //             ((const TypeNode*) n)->type()->is_long()->_lo ==
11235 //             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
11236 
11237 //   format %{ "movl    $dst, $src\t# unsigned i2l" %}
11238 //   ins_encode(enc_copy(dst, src));
11239 // //   opcode(0x63); // needs REX.W
11240 // //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
11241 //   ins_pipe(ialu_reg_reg);
11242 // %}
11243 
11244 // Zero-extend convert int to long
11245 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
11246 %{
11247   match(Set dst (AndL (ConvI2L src) mask));
11248 
11249   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11250   ins_encode %{
11251     if ($dst$$reg != $src$$reg) {
11252       __ movl($dst$$Register, $src$$Register);
11253     }
11254   %}
11255   ins_pipe(ialu_reg_reg);
11256 %}
11257 
11258 // Zero-extend convert int to long
11259 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
11260 %{
11261   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
11262 
11263   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
11264   ins_encode %{
11265     __ movl($dst$$Register, $src$$Address);
11266   %}
11267   ins_pipe(ialu_reg_mem);
11268 %}
11269 
11270 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
11271 %{
11272   match(Set dst (AndL src mask));
11273 
11274   format %{ "movl    $dst, $src\t# zero-extend long" %}
11275   ins_encode %{
11276     __ movl($dst$$Register, $src$$Register);
11277   %}
11278   ins_pipe(ialu_reg_reg);
11279 %}
11280 
11281 instruct convL2I_reg_reg(rRegI dst, rRegL src)
11282 %{
11283   match(Set dst (ConvL2I src));
11284 
11285   format %{ "movl    $dst, $src\t# l2i" %}
11286   ins_encode %{
11287     __ movl($dst$$Register, $src$$Register);
11288   %}
11289   ins_pipe(ialu_reg_reg);
11290 %}
11291 
11292 
11293 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11294   match(Set dst (MoveF2I src));
11295   effect(DEF dst, USE src);
11296 
11297   ins_cost(125);
11298   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
11299   ins_encode %{
11300     __ movl($dst$$Register, Address(rsp, $src$$disp));
11301   %}
11302   ins_pipe(ialu_reg_mem);
11303 %}
11304 
11305 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11306   match(Set dst (MoveI2F src));
11307   effect(DEF dst, USE src);
11308 
11309   ins_cost(125);
11310   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
11311   ins_encode %{
11312     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11313   %}
11314   ins_pipe(pipe_slow);
11315 %}
11316 
11317 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
11318   match(Set dst (MoveD2L src));
11319   effect(DEF dst, USE src);
11320 
11321   ins_cost(125);
11322   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
11323   ins_encode %{
11324     __ movq($dst$$Register, Address(rsp, $src$$disp));
11325   %}
11326   ins_pipe(ialu_reg_mem);
11327 %}
11328 
11329 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
11330   predicate(!UseXmmLoadAndClearUpper);
11331   match(Set dst (MoveL2D src));
11332   effect(DEF dst, USE src);
11333 
11334   ins_cost(125);
11335   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
11336   ins_encode %{
11337     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11338   %}
11339   ins_pipe(pipe_slow);
11340 %}
11341 
11342 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11343   predicate(UseXmmLoadAndClearUpper);
11344   match(Set dst (MoveL2D src));
11345   effect(DEF dst, USE src);
11346 
11347   ins_cost(125);
11348   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
11349   ins_encode %{
11350     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11351   %}
11352   ins_pipe(pipe_slow);
11353 %}
11354 
11355 
11356 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11357   match(Set dst (MoveF2I src));
11358   effect(DEF dst, USE src);
11359 
11360   ins_cost(95); // XXX
11361   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
11362   ins_encode %{
11363     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11364   %}
11365   ins_pipe(pipe_slow);
11366 %}
11367 
11368 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11369   match(Set dst (MoveI2F src));
11370   effect(DEF dst, USE src);
11371 
11372   ins_cost(100);
11373   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
11374   ins_encode %{
11375     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11376   %}
11377   ins_pipe( ialu_mem_reg );
11378 %}
11379 
11380 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11381   match(Set dst (MoveD2L src));
11382   effect(DEF dst, USE src);
11383 
11384   ins_cost(95); // XXX
11385   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
11386   ins_encode %{
11387     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11388   %}
11389   ins_pipe(pipe_slow);
11390 %}
11391 
11392 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
11393   match(Set dst (MoveL2D src));
11394   effect(DEF dst, USE src);
11395 
11396   ins_cost(100);
11397   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
11398   ins_encode %{
11399     __ movq(Address(rsp, $dst$$disp), $src$$Register);
11400   %}
11401   ins_pipe(ialu_mem_reg);
11402 %}
11403 
11404 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
11405   match(Set dst (MoveF2I src));
11406   effect(DEF dst, USE src);
11407   ins_cost(85);
11408   format %{ "movd    $dst,$src\t# MoveF2I" %}
11409   ins_encode %{
11410     __ movdl($dst$$Register, $src$$XMMRegister);
11411   %}
11412   ins_pipe( pipe_slow );
11413 %}
11414 
11415 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
11416   match(Set dst (MoveD2L src));
11417   effect(DEF dst, USE src);
11418   ins_cost(85);
11419   format %{ "movd    $dst,$src\t# MoveD2L" %}
11420   ins_encode %{
11421     __ movdq($dst$$Register, $src$$XMMRegister);
11422   %}
11423   ins_pipe( pipe_slow );
11424 %}
11425 
11426 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
11427   match(Set dst (MoveI2F src));
11428   effect(DEF dst, USE src);
11429   ins_cost(100);
11430   format %{ "movd    $dst,$src\t# MoveI2F" %}
11431   ins_encode %{
11432     __ movdl($dst$$XMMRegister, $src$$Register);
11433   %}
11434   ins_pipe( pipe_slow );
11435 %}
11436 
11437 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
11438   match(Set dst (MoveL2D src));
11439   effect(DEF dst, USE src);
11440   ins_cost(100);
11441   format %{ "movd    $dst,$src\t# MoveL2D" %}
11442   ins_encode %{
11443      __ movdq($dst$$XMMRegister, $src$$Register);
11444   %}
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 
11449 // Fast clearing of an array
11450 // Small ClearArray non-AVX512.
11451 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11452                   Universe dummy, rFlagsReg cr)
11453 %{
11454   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11455   match(Set dummy (ClearArray (Binary cnt base) val));
11456   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11457 
11458   format %{ $$template
11459     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11460     $$emit$$"jg      LARGE\n\t"
11461     $$emit$$"dec     rcx\n\t"
11462     $$emit$$"js      DONE\t# Zero length\n\t"
11463     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11464     $$emit$$"dec     rcx\n\t"
11465     $$emit$$"jge     LOOP\n\t"
11466     $$emit$$"jmp     DONE\n\t"
11467     $$emit$$"# LARGE:\n\t"
11468     if (UseFastStosb) {
11469        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11470        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11471     } else if (UseXMMForObjInit) {
11472        $$emit$$"movdq   $tmp, $val\n\t"
11473        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11474        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11475        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11476        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11477        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11478        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11479        $$emit$$"add     0x40,rax\n\t"
11480        $$emit$$"# L_zero_64_bytes:\n\t"
11481        $$emit$$"sub     0x8,rcx\n\t"
11482        $$emit$$"jge     L_loop\n\t"
11483        $$emit$$"add     0x4,rcx\n\t"
11484        $$emit$$"jl      L_tail\n\t"
11485        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11486        $$emit$$"add     0x20,rax\n\t"
11487        $$emit$$"sub     0x4,rcx\n\t"
11488        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11489        $$emit$$"add     0x4,rcx\n\t"
11490        $$emit$$"jle     L_end\n\t"
11491        $$emit$$"dec     rcx\n\t"
11492        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11493        $$emit$$"vmovq   xmm0,(rax)\n\t"
11494        $$emit$$"add     0x8,rax\n\t"
11495        $$emit$$"dec     rcx\n\t"
11496        $$emit$$"jge     L_sloop\n\t"
11497        $$emit$$"# L_end:\n\t"
11498     } else {
11499        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11500     }
11501     $$emit$$"# DONE"
11502   %}
11503   ins_encode %{
11504     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11505                  $tmp$$XMMRegister, false, false);
11506   %}
11507   ins_pipe(pipe_slow);
11508 %}
11509 
11510 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11511                             Universe dummy, rFlagsReg cr)
11512 %{
11513   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11514   match(Set dummy (ClearArray (Binary cnt base) val));
11515   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11516 
11517   format %{ $$template
11518     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11519     $$emit$$"jg      LARGE\n\t"
11520     $$emit$$"dec     rcx\n\t"
11521     $$emit$$"js      DONE\t# Zero length\n\t"
11522     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11523     $$emit$$"dec     rcx\n\t"
11524     $$emit$$"jge     LOOP\n\t"
11525     $$emit$$"jmp     DONE\n\t"
11526     $$emit$$"# LARGE:\n\t"
11527     if (UseXMMForObjInit) {
11528        $$emit$$"movdq   $tmp, $val\n\t"
11529        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11530        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11531        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11532        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11533        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11534        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11535        $$emit$$"add     0x40,rax\n\t"
11536        $$emit$$"# L_zero_64_bytes:\n\t"
11537        $$emit$$"sub     0x8,rcx\n\t"
11538        $$emit$$"jge     L_loop\n\t"
11539        $$emit$$"add     0x4,rcx\n\t"
11540        $$emit$$"jl      L_tail\n\t"
11541        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11542        $$emit$$"add     0x20,rax\n\t"
11543        $$emit$$"sub     0x4,rcx\n\t"
11544        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11545        $$emit$$"add     0x4,rcx\n\t"
11546        $$emit$$"jle     L_end\n\t"
11547        $$emit$$"dec     rcx\n\t"
11548        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11549        $$emit$$"vmovq   xmm0,(rax)\n\t"
11550        $$emit$$"add     0x8,rax\n\t"
11551        $$emit$$"dec     rcx\n\t"
11552        $$emit$$"jge     L_sloop\n\t"
11553        $$emit$$"# L_end:\n\t"
11554     } else {
11555        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11556     }
11557     $$emit$$"# DONE"
11558   %}
11559   ins_encode %{
11560     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11561                  $tmp$$XMMRegister, false, true);
11562   %}
11563   ins_pipe(pipe_slow);
11564 %}
11565 
11566 // Small ClearArray AVX512 non-constant length.
11567 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11568                        Universe dummy, rFlagsReg cr)
11569 %{
11570   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11571   match(Set dummy (ClearArray (Binary cnt base) val));
11572   ins_cost(125);
11573   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11574 
11575   format %{ $$template
11576     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11577     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11578     $$emit$$"jg      LARGE\n\t"
11579     $$emit$$"dec     rcx\n\t"
11580     $$emit$$"js      DONE\t# Zero length\n\t"
11581     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11582     $$emit$$"dec     rcx\n\t"
11583     $$emit$$"jge     LOOP\n\t"
11584     $$emit$$"jmp     DONE\n\t"
11585     $$emit$$"# LARGE:\n\t"
11586     if (UseFastStosb) {
11587        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11588        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11589     } else if (UseXMMForObjInit) {
11590        $$emit$$"mov     rdi,rax\n\t"
11591        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11592        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11593        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11594        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11595        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11596        $$emit$$"add     0x40,rax\n\t"
11597        $$emit$$"# L_zero_64_bytes:\n\t"
11598        $$emit$$"sub     0x8,rcx\n\t"
11599        $$emit$$"jge     L_loop\n\t"
11600        $$emit$$"add     0x4,rcx\n\t"
11601        $$emit$$"jl      L_tail\n\t"
11602        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11603        $$emit$$"add     0x20,rax\n\t"
11604        $$emit$$"sub     0x4,rcx\n\t"
11605        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11606        $$emit$$"add     0x4,rcx\n\t"
11607        $$emit$$"jle     L_end\n\t"
11608        $$emit$$"dec     rcx\n\t"
11609        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11610        $$emit$$"vmovq   xmm0,(rax)\n\t"
11611        $$emit$$"add     0x8,rax\n\t"
11612        $$emit$$"dec     rcx\n\t"
11613        $$emit$$"jge     L_sloop\n\t"
11614        $$emit$$"# L_end:\n\t"
11615     } else {
11616        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11617     }
11618     $$emit$$"# DONE"
11619   %}
11620   ins_encode %{
11621     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11622                  $tmp$$XMMRegister, false, false, $ktmp$$KRegister);
11623   %}
11624   ins_pipe(pipe_slow);
11625 %}
11626 
11627 instruct rep_stos_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11628                                  Universe dummy, rFlagsReg cr)
11629 %{
11630   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11631   match(Set dummy (ClearArray (Binary cnt base) val));
11632   ins_cost(125);
11633   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11634 
11635   format %{ $$template
11636     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11637     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11638     $$emit$$"jg      LARGE\n\t"
11639     $$emit$$"dec     rcx\n\t"
11640     $$emit$$"js      DONE\t# Zero length\n\t"
11641     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11642     $$emit$$"dec     rcx\n\t"
11643     $$emit$$"jge     LOOP\n\t"
11644     $$emit$$"jmp     DONE\n\t"
11645     $$emit$$"# LARGE:\n\t"
11646     if (UseFastStosb) {
11647        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11648        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
11649     } else if (UseXMMForObjInit) {
11650        $$emit$$"mov     rdi,rax\n\t"
11651        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11652        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11653        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11654        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11655        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11656        $$emit$$"add     0x40,rax\n\t"
11657        $$emit$$"# L_zero_64_bytes:\n\t"
11658        $$emit$$"sub     0x8,rcx\n\t"
11659        $$emit$$"jge     L_loop\n\t"
11660        $$emit$$"add     0x4,rcx\n\t"
11661        $$emit$$"jl      L_tail\n\t"
11662        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11663        $$emit$$"add     0x20,rax\n\t"
11664        $$emit$$"sub     0x4,rcx\n\t"
11665        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11666        $$emit$$"add     0x4,rcx\n\t"
11667        $$emit$$"jle     L_end\n\t"
11668        $$emit$$"dec     rcx\n\t"
11669        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11670        $$emit$$"vmovq   xmm0,(rax)\n\t"
11671        $$emit$$"add     0x8,rax\n\t"
11672        $$emit$$"dec     rcx\n\t"
11673        $$emit$$"jge     L_sloop\n\t"
11674        $$emit$$"# L_end:\n\t"
11675     } else {
11676        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11677     }
11678     $$emit$$"# DONE"
11679   %}
11680   ins_encode %{
11681     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11682                  $tmp$$XMMRegister, false, true, $ktmp$$KRegister);
11683   %}
11684   ins_pipe(pipe_slow);
11685 %}
11686 
11687 // Large ClearArray non-AVX512.
11688 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11689                         Universe dummy, rFlagsReg cr)
11690 %{
11691   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11692   match(Set dummy (ClearArray (Binary cnt base) val));
11693   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11694 
11695   format %{ $$template
11696     if (UseFastStosb) {
11697        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11698        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11699     } else if (UseXMMForObjInit) {
11700        $$emit$$"movdq   $tmp, $val\n\t"
11701        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11702        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11703        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11704        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11705        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11706        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11707        $$emit$$"add     0x40,rax\n\t"
11708        $$emit$$"# L_zero_64_bytes:\n\t"
11709        $$emit$$"sub     0x8,rcx\n\t"
11710        $$emit$$"jge     L_loop\n\t"
11711        $$emit$$"add     0x4,rcx\n\t"
11712        $$emit$$"jl      L_tail\n\t"
11713        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11714        $$emit$$"add     0x20,rax\n\t"
11715        $$emit$$"sub     0x4,rcx\n\t"
11716        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11717        $$emit$$"add     0x4,rcx\n\t"
11718        $$emit$$"jle     L_end\n\t"
11719        $$emit$$"dec     rcx\n\t"
11720        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11721        $$emit$$"vmovq   xmm0,(rax)\n\t"
11722        $$emit$$"add     0x8,rax\n\t"
11723        $$emit$$"dec     rcx\n\t"
11724        $$emit$$"jge     L_sloop\n\t"
11725        $$emit$$"# L_end:\n\t"
11726     } else {
11727        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11728     }
11729   %}
11730   ins_encode %{
11731     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11732                  $tmp$$XMMRegister, true, false);
11733   %}
11734   ins_pipe(pipe_slow);
11735 %}
11736 
11737 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11738                                   Universe dummy, rFlagsReg cr)
11739 %{
11740   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX <= 2));
11741   match(Set dummy (ClearArray (Binary cnt base) val));
11742   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, USE_KILL val, KILL cr);
11743 
11744   format %{ $$template
11745     if (UseXMMForObjInit) {
11746        $$emit$$"movdq   $tmp, $val\n\t"
11747        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11748        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11749        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11750        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11751        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11752        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11753        $$emit$$"add     0x40,rax\n\t"
11754        $$emit$$"# L_zero_64_bytes:\n\t"
11755        $$emit$$"sub     0x8,rcx\n\t"
11756        $$emit$$"jge     L_loop\n\t"
11757        $$emit$$"add     0x4,rcx\n\t"
11758        $$emit$$"jl      L_tail\n\t"
11759        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11760        $$emit$$"add     0x20,rax\n\t"
11761        $$emit$$"sub     0x4,rcx\n\t"
11762        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11763        $$emit$$"add     0x4,rcx\n\t"
11764        $$emit$$"jle     L_end\n\t"
11765        $$emit$$"dec     rcx\n\t"
11766        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11767        $$emit$$"vmovq   xmm0,(rax)\n\t"
11768        $$emit$$"add     0x8,rax\n\t"
11769        $$emit$$"dec     rcx\n\t"
11770        $$emit$$"jge     L_sloop\n\t"
11771        $$emit$$"# L_end:\n\t"
11772     } else {
11773        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11774     }
11775   %}
11776   ins_encode %{
11777     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11778                  $tmp$$XMMRegister, true, true);
11779   %}
11780   ins_pipe(pipe_slow);
11781 %}
11782 
11783 // Large ClearArray AVX512.
11784 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11785                              Universe dummy, rFlagsReg cr)
11786 %{
11787   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11788   match(Set dummy (ClearArray (Binary cnt base) val));
11789   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11790 
11791   format %{ $$template
11792     if (UseFastStosb) {
11793        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11794        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11795        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11796     } else if (UseXMMForObjInit) {
11797        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11798        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11799        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11800        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11801        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11802        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11803        $$emit$$"add     0x40,rax\n\t"
11804        $$emit$$"# L_zero_64_bytes:\n\t"
11805        $$emit$$"sub     0x8,rcx\n\t"
11806        $$emit$$"jge     L_loop\n\t"
11807        $$emit$$"add     0x4,rcx\n\t"
11808        $$emit$$"jl      L_tail\n\t"
11809        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11810        $$emit$$"add     0x20,rax\n\t"
11811        $$emit$$"sub     0x4,rcx\n\t"
11812        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11813        $$emit$$"add     0x4,rcx\n\t"
11814        $$emit$$"jle     L_end\n\t"
11815        $$emit$$"dec     rcx\n\t"
11816        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11817        $$emit$$"vmovq   xmm0,(rax)\n\t"
11818        $$emit$$"add     0x8,rax\n\t"
11819        $$emit$$"dec     rcx\n\t"
11820        $$emit$$"jge     L_sloop\n\t"
11821        $$emit$$"# L_end:\n\t"
11822     } else {
11823        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11824        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11825     }
11826   %}
11827   ins_encode %{
11828     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11829                  $tmp$$XMMRegister, true, false, $ktmp$$KRegister);
11830   %}
11831   ins_pipe(pipe_slow);
11832 %}
11833 
11834 instruct rep_stos_large_evex_word_copy(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegL val,
11835                                        Universe dummy, rFlagsReg cr)
11836 %{
11837   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only() && (UseAVX > 2));
11838   match(Set dummy (ClearArray (Binary cnt base) val));
11839   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, USE_KILL val, KILL cr);
11840 
11841   format %{ $$template
11842     if (UseFastStosb) {
11843        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11844        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11845        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11846     } else if (UseXMMForObjInit) {
11847        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
11848        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
11849        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11850        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11851        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11852        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
11853        $$emit$$"add     0x40,rax\n\t"
11854        $$emit$$"# L_zero_64_bytes:\n\t"
11855        $$emit$$"sub     0x8,rcx\n\t"
11856        $$emit$$"jge     L_loop\n\t"
11857        $$emit$$"add     0x4,rcx\n\t"
11858        $$emit$$"jl      L_tail\n\t"
11859        $$emit$$"vmovdqu ymm0,(rax)\n\t"
11860        $$emit$$"add     0x20,rax\n\t"
11861        $$emit$$"sub     0x4,rcx\n\t"
11862        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11863        $$emit$$"add     0x4,rcx\n\t"
11864        $$emit$$"jle     L_end\n\t"
11865        $$emit$$"dec     rcx\n\t"
11866        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11867        $$emit$$"vmovq   xmm0,(rax)\n\t"
11868        $$emit$$"add     0x8,rax\n\t"
11869        $$emit$$"dec     rcx\n\t"
11870        $$emit$$"jge     L_sloop\n\t"
11871        $$emit$$"# L_end:\n\t"
11872     } else {
11873        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11874        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11875     }
11876   %}
11877   ins_encode %{
11878     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11879                  $tmp$$XMMRegister, true, true, $ktmp$$KRegister);
11880   %}
11881   ins_pipe(pipe_slow);
11882 %}
11883 
11884 // Small ClearArray AVX512 constant length.
11885 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rax_RegL val, kReg ktmp, Universe dummy, rFlagsReg cr)
11886 %{
11887   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only() &&
11888             ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11889   match(Set dummy (ClearArray (Binary cnt base) val));
11890   ins_cost(100);
11891   effect(TEMP tmp, USE_KILL val, TEMP ktmp, KILL cr);
11892   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11893   ins_encode %{
11894     __ clear_mem($base$$Register, $cnt$$constant, $val$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11895   %}
11896   ins_pipe(pipe_slow);
11897 %}
11898 
11899 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11900                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11901 %{
11902   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11903   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11904   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11905 
11906   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11907   ins_encode %{
11908     __ string_compare($str1$$Register, $str2$$Register,
11909                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11910                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11911   %}
11912   ins_pipe( pipe_slow );
11913 %}
11914 
11915 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11916                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11917 %{
11918   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11919   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11920   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11921 
11922   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11923   ins_encode %{
11924     __ string_compare($str1$$Register, $str2$$Register,
11925                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11926                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11927   %}
11928   ins_pipe( pipe_slow );
11929 %}
11930 
11931 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11932                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11933 %{
11934   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11935   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11936   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11937 
11938   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11939   ins_encode %{
11940     __ string_compare($str1$$Register, $str2$$Register,
11941                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11942                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11943   %}
11944   ins_pipe( pipe_slow );
11945 %}
11946 
11947 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11948                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11949 %{
11950   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11951   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11952   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11953 
11954   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11955   ins_encode %{
11956     __ string_compare($str1$$Register, $str2$$Register,
11957                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11958                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11959   %}
11960   ins_pipe( pipe_slow );
11961 %}
11962 
11963 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11964                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11965 %{
11966   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11967   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11968   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11969 
11970   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11971   ins_encode %{
11972     __ string_compare($str1$$Register, $str2$$Register,
11973                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11974                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11975   %}
11976   ins_pipe( pipe_slow );
11977 %}
11978 
11979 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11980                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
11981 %{
11982   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11983   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11984   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11985 
11986   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11987   ins_encode %{
11988     __ string_compare($str1$$Register, $str2$$Register,
11989                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11990                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11991   %}
11992   ins_pipe( pipe_slow );
11993 %}
11994 
11995 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
11996                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
11997 %{
11998   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11999   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12000   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12001 
12002   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12003   ins_encode %{
12004     __ string_compare($str2$$Register, $str1$$Register,
12005                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
12006                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
12007   %}
12008   ins_pipe( pipe_slow );
12009 %}
12010 
12011 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
12012                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
12013 %{
12014   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12015   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12016   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12017 
12018   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12019   ins_encode %{
12020     __ string_compare($str2$$Register, $str1$$Register,
12021                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
12022                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
12023   %}
12024   ins_pipe( pipe_slow );
12025 %}
12026 
12027 // fast search of substring with known size.
12028 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12029                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12030 %{
12031   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12032   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12033   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12034 
12035   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12036   ins_encode %{
12037     int icnt2 = (int)$int_cnt2$$constant;
12038     if (icnt2 >= 16) {
12039       // IndexOf for constant substrings with size >= 16 elements
12040       // which don't need to be loaded through stack.
12041       __ string_indexofC8($str1$$Register, $str2$$Register,
12042                           $cnt1$$Register, $cnt2$$Register,
12043                           icnt2, $result$$Register,
12044                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12045     } else {
12046       // Small strings are loaded through stack if they cross page boundary.
12047       __ string_indexof($str1$$Register, $str2$$Register,
12048                         $cnt1$$Register, $cnt2$$Register,
12049                         icnt2, $result$$Register,
12050                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12051     }
12052   %}
12053   ins_pipe( pipe_slow );
12054 %}
12055 
12056 // fast search of substring with known size.
12057 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12058                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12059 %{
12060   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12061   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12062   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12063 
12064   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12065   ins_encode %{
12066     int icnt2 = (int)$int_cnt2$$constant;
12067     if (icnt2 >= 8) {
12068       // IndexOf for constant substrings with size >= 8 elements
12069       // which don't need to be loaded through stack.
12070       __ string_indexofC8($str1$$Register, $str2$$Register,
12071                           $cnt1$$Register, $cnt2$$Register,
12072                           icnt2, $result$$Register,
12073                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12074     } else {
12075       // Small strings are loaded through stack if they cross page boundary.
12076       __ string_indexof($str1$$Register, $str2$$Register,
12077                         $cnt1$$Register, $cnt2$$Register,
12078                         icnt2, $result$$Register,
12079                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12080     }
12081   %}
12082   ins_pipe( pipe_slow );
12083 %}
12084 
12085 // fast search of substring with known size.
12086 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
12087                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
12088 %{
12089   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12090   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12091   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12092 
12093   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
12094   ins_encode %{
12095     int icnt2 = (int)$int_cnt2$$constant;
12096     if (icnt2 >= 8) {
12097       // IndexOf for constant substrings with size >= 8 elements
12098       // which don't need to be loaded through stack.
12099       __ string_indexofC8($str1$$Register, $str2$$Register,
12100                           $cnt1$$Register, $cnt2$$Register,
12101                           icnt2, $result$$Register,
12102                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12103     } else {
12104       // Small strings are loaded through stack if they cross page boundary.
12105       __ string_indexof($str1$$Register, $str2$$Register,
12106                         $cnt1$$Register, $cnt2$$Register,
12107                         icnt2, $result$$Register,
12108                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12109     }
12110   %}
12111   ins_pipe( pipe_slow );
12112 %}
12113 
12114 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12115                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12116 %{
12117   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12118   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12119   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12120 
12121   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12122   ins_encode %{
12123     __ string_indexof($str1$$Register, $str2$$Register,
12124                       $cnt1$$Register, $cnt2$$Register,
12125                       (-1), $result$$Register,
12126                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12127   %}
12128   ins_pipe( pipe_slow );
12129 %}
12130 
12131 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12132                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12133 %{
12134   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12135   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12136   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12137 
12138   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12139   ins_encode %{
12140     __ string_indexof($str1$$Register, $str2$$Register,
12141                       $cnt1$$Register, $cnt2$$Register,
12142                       (-1), $result$$Register,
12143                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12144   %}
12145   ins_pipe( pipe_slow );
12146 %}
12147 
12148 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
12149                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
12150 %{
12151   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12152   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12153   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12154 
12155   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12156   ins_encode %{
12157     __ string_indexof($str1$$Register, $str2$$Register,
12158                       $cnt1$$Register, $cnt2$$Register,
12159                       (-1), $result$$Register,
12160                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12161   %}
12162   ins_pipe( pipe_slow );
12163 %}
12164 
12165 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
12166                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
12167 %{
12168   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12169   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12170   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12171   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12172   ins_encode %{
12173     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12174                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12175   %}
12176   ins_pipe( pipe_slow );
12177 %}
12178 
12179 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
12180                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
12181 %{
12182   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12183   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12184   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12185   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12186   ins_encode %{
12187     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12188                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
12189   %}
12190   ins_pipe( pipe_slow );
12191 %}
12192 
12193 // fast string equals
12194 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12195                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
12196 %{
12197   predicate(!VM_Version::supports_avx512vlbw());
12198   match(Set result (StrEquals (Binary str1 str2) cnt));
12199   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12200 
12201   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12202   ins_encode %{
12203     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12204                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12205                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12206   %}
12207   ins_pipe( pipe_slow );
12208 %}
12209 
12210 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
12211                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
12212 %{
12213   predicate(VM_Version::supports_avx512vlbw());
12214   match(Set result (StrEquals (Binary str1 str2) cnt));
12215   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12216 
12217   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12218   ins_encode %{
12219     __ arrays_equals(false, $str1$$Register, $str2$$Register,
12220                      $cnt$$Register, $result$$Register, $tmp3$$Register,
12221                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12222   %}
12223   ins_pipe( pipe_slow );
12224 %}
12225 
12226 // fast array equals
12227 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12228                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12229 %{
12230   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12231   match(Set result (AryEq ary1 ary2));
12232   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12233 
12234   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12235   ins_encode %{
12236     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12237                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12238                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12239   %}
12240   ins_pipe( pipe_slow );
12241 %}
12242 
12243 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12244                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12245 %{
12246   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12247   match(Set result (AryEq ary1 ary2));
12248   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12249 
12250   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12251   ins_encode %{
12252     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12253                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12254                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12255   %}
12256   ins_pipe( pipe_slow );
12257 %}
12258 
12259 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12260                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12261 %{
12262   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12263   match(Set result (AryEq ary1 ary2));
12264   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12265 
12266   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12267   ins_encode %{
12268     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12269                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12270                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12271   %}
12272   ins_pipe( pipe_slow );
12273 %}
12274 
12275 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
12276                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
12277 %{
12278   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12279   match(Set result (AryEq ary1 ary2));
12280   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12281 
12282   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12283   ins_encode %{
12284     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12285                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12286                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12287   %}
12288   ins_pipe( pipe_slow );
12289 %}
12290 
12291 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12292                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
12293 %{
12294   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12295   match(Set result (CountPositives ary1 len));
12296   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12297 
12298   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12299   ins_encode %{
12300     __ count_positives($ary1$$Register, $len$$Register,
12301                        $result$$Register, $tmp3$$Register,
12302                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12303   %}
12304   ins_pipe( pipe_slow );
12305 %}
12306 
12307 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
12308                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
12309 %{
12310   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12311   match(Set result (CountPositives ary1 len));
12312   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12313 
12314   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12315   ins_encode %{
12316     __ count_positives($ary1$$Register, $len$$Register,
12317                        $result$$Register, $tmp3$$Register,
12318                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12319   %}
12320   ins_pipe( pipe_slow );
12321 %}
12322 
12323 // fast char[] to byte[] compression
12324 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12325                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12326   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12327   match(Set result (StrCompressedCopy src (Binary dst len)));
12328   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
12329          USE_KILL len, KILL tmp5, KILL cr);
12330 
12331   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12332   ins_encode %{
12333     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12334                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12335                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12336                            knoreg, knoreg);
12337   %}
12338   ins_pipe( pipe_slow );
12339 %}
12340 
12341 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
12342                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12343   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12344   match(Set result (StrCompressedCopy src (Binary dst len)));
12345   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
12346          USE_KILL len, KILL tmp5, KILL cr);
12347 
12348   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12349   ins_encode %{
12350     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12351                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12352                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12353                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12354   %}
12355   ins_pipe( pipe_slow );
12356 %}
12357 // fast byte[] to char[] inflation
12358 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12359                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
12360   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12361   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12362   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12363 
12364   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12365   ins_encode %{
12366     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12367                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12368   %}
12369   ins_pipe( pipe_slow );
12370 %}
12371 
12372 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12373                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
12374   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12375   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12376   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12377 
12378   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12379   ins_encode %{
12380     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12381                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12382   %}
12383   ins_pipe( pipe_slow );
12384 %}
12385 
12386 // encode char[] to byte[] in ISO_8859_1
12387 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12388                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12389                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12390   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12391   match(Set result (EncodeISOArray src (Binary dst len)));
12392   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12393 
12394   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12395   ins_encode %{
12396     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12397                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12398                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12399   %}
12400   ins_pipe( pipe_slow );
12401 %}
12402 
12403 // encode char[] to byte[] in ASCII
12404 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
12405                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
12406                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
12407   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12408   match(Set result (EncodeISOArray src (Binary dst len)));
12409   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12410 
12411   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
12412   ins_encode %{
12413     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12414                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12415                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12416   %}
12417   ins_pipe( pipe_slow );
12418 %}
12419 
12420 //----------Overflow Math Instructions-----------------------------------------
12421 
12422 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12423 %{
12424   match(Set cr (OverflowAddI op1 op2));
12425   effect(DEF cr, USE_KILL op1, USE op2);
12426 
12427   format %{ "addl    $op1, $op2\t# overflow check int" %}
12428 
12429   ins_encode %{
12430     __ addl($op1$$Register, $op2$$Register);
12431   %}
12432   ins_pipe(ialu_reg_reg);
12433 %}
12434 
12435 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
12436 %{
12437   match(Set cr (OverflowAddI op1 op2));
12438   effect(DEF cr, USE_KILL op1, USE op2);
12439 
12440   format %{ "addl    $op1, $op2\t# overflow check int" %}
12441 
12442   ins_encode %{
12443     __ addl($op1$$Register, $op2$$constant);
12444   %}
12445   ins_pipe(ialu_reg_reg);
12446 %}
12447 
12448 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12449 %{
12450   match(Set cr (OverflowAddL op1 op2));
12451   effect(DEF cr, USE_KILL op1, USE op2);
12452 
12453   format %{ "addq    $op1, $op2\t# overflow check long" %}
12454   ins_encode %{
12455     __ addq($op1$$Register, $op2$$Register);
12456   %}
12457   ins_pipe(ialu_reg_reg);
12458 %}
12459 
12460 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
12461 %{
12462   match(Set cr (OverflowAddL op1 op2));
12463   effect(DEF cr, USE_KILL op1, USE op2);
12464 
12465   format %{ "addq    $op1, $op2\t# overflow check long" %}
12466   ins_encode %{
12467     __ addq($op1$$Register, $op2$$constant);
12468   %}
12469   ins_pipe(ialu_reg_reg);
12470 %}
12471 
12472 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12473 %{
12474   match(Set cr (OverflowSubI op1 op2));
12475 
12476   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12477   ins_encode %{
12478     __ cmpl($op1$$Register, $op2$$Register);
12479   %}
12480   ins_pipe(ialu_reg_reg);
12481 %}
12482 
12483 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12484 %{
12485   match(Set cr (OverflowSubI op1 op2));
12486 
12487   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
12488   ins_encode %{
12489     __ cmpl($op1$$Register, $op2$$constant);
12490   %}
12491   ins_pipe(ialu_reg_reg);
12492 %}
12493 
12494 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12495 %{
12496   match(Set cr (OverflowSubL op1 op2));
12497 
12498   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12499   ins_encode %{
12500     __ cmpq($op1$$Register, $op2$$Register);
12501   %}
12502   ins_pipe(ialu_reg_reg);
12503 %}
12504 
12505 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12506 %{
12507   match(Set cr (OverflowSubL op1 op2));
12508 
12509   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
12510   ins_encode %{
12511     __ cmpq($op1$$Register, $op2$$constant);
12512   %}
12513   ins_pipe(ialu_reg_reg);
12514 %}
12515 
12516 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
12517 %{
12518   match(Set cr (OverflowSubI zero op2));
12519   effect(DEF cr, USE_KILL op2);
12520 
12521   format %{ "negl    $op2\t# overflow check int" %}
12522   ins_encode %{
12523     __ negl($op2$$Register);
12524   %}
12525   ins_pipe(ialu_reg_reg);
12526 %}
12527 
12528 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
12529 %{
12530   match(Set cr (OverflowSubL zero op2));
12531   effect(DEF cr, USE_KILL op2);
12532 
12533   format %{ "negq    $op2\t# overflow check long" %}
12534   ins_encode %{
12535     __ negq($op2$$Register);
12536   %}
12537   ins_pipe(ialu_reg_reg);
12538 %}
12539 
12540 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
12541 %{
12542   match(Set cr (OverflowMulI op1 op2));
12543   effect(DEF cr, USE_KILL op1, USE op2);
12544 
12545   format %{ "imull    $op1, $op2\t# overflow check int" %}
12546   ins_encode %{
12547     __ imull($op1$$Register, $op2$$Register);
12548   %}
12549   ins_pipe(ialu_reg_reg_alu0);
12550 %}
12551 
12552 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
12553 %{
12554   match(Set cr (OverflowMulI op1 op2));
12555   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12556 
12557   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
12558   ins_encode %{
12559     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
12560   %}
12561   ins_pipe(ialu_reg_reg_alu0);
12562 %}
12563 
12564 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
12565 %{
12566   match(Set cr (OverflowMulL op1 op2));
12567   effect(DEF cr, USE_KILL op1, USE op2);
12568 
12569   format %{ "imulq    $op1, $op2\t# overflow check long" %}
12570   ins_encode %{
12571     __ imulq($op1$$Register, $op2$$Register);
12572   %}
12573   ins_pipe(ialu_reg_reg_alu0);
12574 %}
12575 
12576 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
12577 %{
12578   match(Set cr (OverflowMulL op1 op2));
12579   effect(DEF cr, TEMP tmp, USE op1, USE op2);
12580 
12581   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
12582   ins_encode %{
12583     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
12584   %}
12585   ins_pipe(ialu_reg_reg_alu0);
12586 %}
12587 
12588 
12589 //----------Control Flow Instructions------------------------------------------
12590 // Signed compare Instructions
12591 
12592 // XXX more variants!!
12593 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
12594 %{
12595   match(Set cr (CmpI op1 op2));
12596   effect(DEF cr, USE op1, USE op2);
12597 
12598   format %{ "cmpl    $op1, $op2" %}
12599   ins_encode %{
12600     __ cmpl($op1$$Register, $op2$$Register);
12601   %}
12602   ins_pipe(ialu_cr_reg_reg);
12603 %}
12604 
12605 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
12606 %{
12607   match(Set cr (CmpI op1 op2));
12608 
12609   format %{ "cmpl    $op1, $op2" %}
12610   ins_encode %{
12611     __ cmpl($op1$$Register, $op2$$constant);
12612   %}
12613   ins_pipe(ialu_cr_reg_imm);
12614 %}
12615 
12616 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
12617 %{
12618   match(Set cr (CmpI op1 (LoadI op2)));
12619 
12620   ins_cost(500); // XXX
12621   format %{ "cmpl    $op1, $op2" %}
12622   ins_encode %{
12623     __ cmpl($op1$$Register, $op2$$Address);
12624   %}
12625   ins_pipe(ialu_cr_reg_mem);
12626 %}
12627 
12628 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
12629 %{
12630   match(Set cr (CmpI src zero));
12631 
12632   format %{ "testl   $src, $src" %}
12633   ins_encode %{
12634     __ testl($src$$Register, $src$$Register);
12635   %}
12636   ins_pipe(ialu_cr_reg_imm);
12637 %}
12638 
12639 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
12640 %{
12641   match(Set cr (CmpI (AndI src con) zero));
12642 
12643   format %{ "testl   $src, $con" %}
12644   ins_encode %{
12645     __ testl($src$$Register, $con$$constant);
12646   %}
12647   ins_pipe(ialu_cr_reg_imm);
12648 %}
12649 
12650 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
12651 %{
12652   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
12653 
12654   format %{ "testl   $src, $mem" %}
12655   ins_encode %{
12656     __ testl($src$$Register, $mem$$Address);
12657   %}
12658   ins_pipe(ialu_cr_reg_mem);
12659 %}
12660 
12661 // Unsigned compare Instructions; really, same as signed except they
12662 // produce an rFlagsRegU instead of rFlagsReg.
12663 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
12664 %{
12665   match(Set cr (CmpU op1 op2));
12666 
12667   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12668   ins_encode %{
12669     __ cmpl($op1$$Register, $op2$$Register);
12670   %}
12671   ins_pipe(ialu_cr_reg_reg);
12672 %}
12673 
12674 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
12675 %{
12676   match(Set cr (CmpU op1 op2));
12677 
12678   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12679   ins_encode %{
12680     __ cmpl($op1$$Register, $op2$$constant);
12681   %}
12682   ins_pipe(ialu_cr_reg_imm);
12683 %}
12684 
12685 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
12686 %{
12687   match(Set cr (CmpU op1 (LoadI op2)));
12688 
12689   ins_cost(500); // XXX
12690   format %{ "cmpl    $op1, $op2\t# unsigned" %}
12691   ins_encode %{
12692     __ cmpl($op1$$Register, $op2$$Address);
12693   %}
12694   ins_pipe(ialu_cr_reg_mem);
12695 %}
12696 
12697 // // // Cisc-spilled version of cmpU_rReg
12698 // //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
12699 // //%{
12700 // //  match(Set cr (CmpU (LoadI op1) op2));
12701 // //
12702 // //  format %{ "CMPu   $op1,$op2" %}
12703 // //  ins_cost(500);
12704 // //  opcode(0x39);  /* Opcode 39 /r */
12705 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12706 // //%}
12707 
12708 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
12709 %{
12710   match(Set cr (CmpU src zero));
12711 
12712   format %{ "testl   $src, $src\t# unsigned" %}
12713   ins_encode %{
12714     __ testl($src$$Register, $src$$Register);
12715   %}
12716   ins_pipe(ialu_cr_reg_imm);
12717 %}
12718 
12719 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
12720 %{
12721   match(Set cr (CmpP op1 op2));
12722 
12723   format %{ "cmpq    $op1, $op2\t# ptr" %}
12724   ins_encode %{
12725     __ cmpq($op1$$Register, $op2$$Register);
12726   %}
12727   ins_pipe(ialu_cr_reg_reg);
12728 %}
12729 
12730 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
12731 %{
12732   match(Set cr (CmpP op1 (LoadP op2)));
12733   predicate(n->in(2)->as_Load()->barrier_data() == 0);
12734 
12735   ins_cost(500); // XXX
12736   format %{ "cmpq    $op1, $op2\t# ptr" %}
12737   ins_encode %{
12738     __ cmpq($op1$$Register, $op2$$Address);
12739   %}
12740   ins_pipe(ialu_cr_reg_mem);
12741 %}
12742 
12743 // // // Cisc-spilled version of cmpP_rReg
12744 // //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
12745 // //%{
12746 // //  match(Set cr (CmpP (LoadP op1) op2));
12747 // //
12748 // //  format %{ "CMPu   $op1,$op2" %}
12749 // //  ins_cost(500);
12750 // //  opcode(0x39);  /* Opcode 39 /r */
12751 // //  ins_encode( OpcP, reg_mem( op1, op2) );
12752 // //%}
12753 
12754 // XXX this is generalized by compP_rReg_mem???
12755 // Compare raw pointer (used in out-of-heap check).
12756 // Only works because non-oop pointers must be raw pointers
12757 // and raw pointers have no anti-dependencies.
12758 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
12759 %{
12760   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
12761             n->in(2)->as_Load()->barrier_data() == 0);
12762   match(Set cr (CmpP op1 (LoadP op2)));
12763 
12764   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
12765   ins_encode %{
12766     __ cmpq($op1$$Register, $op2$$Address);
12767   %}
12768   ins_pipe(ialu_cr_reg_mem);
12769 %}
12770 
12771 // This will generate a signed flags result. This should be OK since
12772 // any compare to a zero should be eq/neq.
12773 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
12774 %{
12775   match(Set cr (CmpP src zero));
12776 
12777   format %{ "testq   $src, $src\t# ptr" %}
12778   ins_encode %{
12779     __ testq($src$$Register, $src$$Register);
12780   %}
12781   ins_pipe(ialu_cr_reg_imm);
12782 %}
12783 
12784 // This will generate a signed flags result. This should be OK since
12785 // any compare to a zero should be eq/neq.
12786 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
12787 %{
12788   predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
12789             n->in(1)->as_Load()->barrier_data() == 0);
12790   match(Set cr (CmpP (LoadP op) zero));
12791 
12792   ins_cost(500); // XXX
12793   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
12794   ins_encode %{
12795     __ testq($op$$Address, 0xFFFFFFFF);
12796   %}
12797   ins_pipe(ialu_cr_reg_imm);
12798 %}
12799 
12800 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
12801 %{
12802   predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
12803             n->in(1)->as_Load()->barrier_data() == 0);
12804   match(Set cr (CmpP (LoadP mem) zero));
12805 
12806   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
12807   ins_encode %{
12808     __ cmpq(r12, $mem$$Address);
12809   %}
12810   ins_pipe(ialu_cr_reg_mem);
12811 %}
12812 
12813 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
12814 %{
12815   match(Set cr (CmpN op1 op2));
12816 
12817   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12818   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
12819   ins_pipe(ialu_cr_reg_reg);
12820 %}
12821 
12822 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
12823 %{
12824   match(Set cr (CmpN src (LoadN mem)));
12825 
12826   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
12827   ins_encode %{
12828     __ cmpl($src$$Register, $mem$$Address);
12829   %}
12830   ins_pipe(ialu_cr_reg_mem);
12831 %}
12832 
12833 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
12834   match(Set cr (CmpN op1 op2));
12835 
12836   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
12837   ins_encode %{
12838     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
12839   %}
12840   ins_pipe(ialu_cr_reg_imm);
12841 %}
12842 
12843 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
12844 %{
12845   match(Set cr (CmpN src (LoadN mem)));
12846 
12847   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
12848   ins_encode %{
12849     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
12850   %}
12851   ins_pipe(ialu_cr_reg_mem);
12852 %}
12853 
12854 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
12855   match(Set cr (CmpN op1 op2));
12856 
12857   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
12858   ins_encode %{
12859     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
12860   %}
12861   ins_pipe(ialu_cr_reg_imm);
12862 %}
12863 
12864 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
12865 %{
12866   match(Set cr (CmpN src (LoadNKlass mem)));
12867 
12868   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
12869   ins_encode %{
12870     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
12871   %}
12872   ins_pipe(ialu_cr_reg_mem);
12873 %}
12874 
12875 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
12876   match(Set cr (CmpN src zero));
12877 
12878   format %{ "testl   $src, $src\t# compressed ptr" %}
12879   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
12880   ins_pipe(ialu_cr_reg_imm);
12881 %}
12882 
12883 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
12884 %{
12885   predicate(CompressedOops::base() != NULL);
12886   match(Set cr (CmpN (LoadN mem) zero));
12887 
12888   ins_cost(500); // XXX
12889   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
12890   ins_encode %{
12891     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
12892   %}
12893   ins_pipe(ialu_cr_reg_mem);
12894 %}
12895 
12896 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
12897 %{
12898   predicate(CompressedOops::base() == NULL);
12899   match(Set cr (CmpN (LoadN mem) zero));
12900 
12901   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
12902   ins_encode %{
12903     __ cmpl(r12, $mem$$Address);
12904   %}
12905   ins_pipe(ialu_cr_reg_mem);
12906 %}
12907 
12908 // Yanked all unsigned pointer compare operations.
12909 // Pointer compares are done with CmpP which is already unsigned.
12910 
12911 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
12912 %{
12913   match(Set cr (CmpL op1 op2));
12914 
12915   format %{ "cmpq    $op1, $op2" %}
12916   ins_encode %{
12917     __ cmpq($op1$$Register, $op2$$Register);
12918   %}
12919   ins_pipe(ialu_cr_reg_reg);
12920 %}
12921 
12922 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
12923 %{
12924   match(Set cr (CmpL op1 op2));
12925 
12926   format %{ "cmpq    $op1, $op2" %}
12927   ins_encode %{
12928     __ cmpq($op1$$Register, $op2$$constant);
12929   %}
12930   ins_pipe(ialu_cr_reg_imm);
12931 %}
12932 
12933 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
12934 %{
12935   match(Set cr (CmpL op1 (LoadL op2)));
12936 
12937   format %{ "cmpq    $op1, $op2" %}
12938   ins_encode %{
12939     __ cmpq($op1$$Register, $op2$$Address);
12940   %}
12941   ins_pipe(ialu_cr_reg_mem);
12942 %}
12943 
12944 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
12945 %{
12946   match(Set cr (CmpL src zero));
12947 
12948   format %{ "testq   $src, $src" %}
12949   ins_encode %{
12950     __ testq($src$$Register, $src$$Register);
12951   %}
12952   ins_pipe(ialu_cr_reg_imm);
12953 %}
12954 
12955 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
12956 %{
12957   match(Set cr (CmpL (AndL src con) zero));
12958 
12959   format %{ "testq   $src, $con\t# long" %}
12960   ins_encode %{
12961     __ testq($src$$Register, $con$$constant);
12962   %}
12963   ins_pipe(ialu_cr_reg_imm);
12964 %}
12965 
12966 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12967 %{
12968   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12969 
12970   format %{ "testq   $src, $mem" %}
12971   ins_encode %{
12972     __ testq($src$$Register, $mem$$Address);
12973   %}
12974   ins_pipe(ialu_cr_reg_mem);
12975 %}
12976 
12977 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12978 %{
12979   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12980 
12981   format %{ "testq   $src, $mem" %}
12982   ins_encode %{
12983     __ testq($src$$Register, $mem$$Address);
12984   %}
12985   ins_pipe(ialu_cr_reg_mem);
12986 %}
12987 
12988 // Manifest a CmpU result in an integer register.  Very painful.
12989 // This is the test to avoid.
12990 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
12991 %{
12992   match(Set dst (CmpU3 src1 src2));
12993   effect(KILL flags);
12994 
12995   ins_cost(275); // XXX
12996   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
12997             "movl    $dst, -1\n\t"
12998             "jb,u    done\n\t"
12999             "setne   $dst\n\t"
13000             "movzbl  $dst, $dst\n\t"
13001     "done:" %}
13002   ins_encode %{
13003     Label done;
13004     __ cmpl($src1$$Register, $src2$$Register);
13005     __ movl($dst$$Register, -1);
13006     __ jccb(Assembler::below, done);
13007     __ setne($dst$$Register);
13008     __ movzbl($dst$$Register, $dst$$Register);
13009     __ bind(done);
13010   %}
13011   ins_pipe(pipe_slow);
13012 %}
13013 
13014 // Manifest a CmpL result in an integer register.  Very painful.
13015 // This is the test to avoid.
13016 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
13017 %{
13018   match(Set dst (CmpL3 src1 src2));
13019   effect(KILL flags);
13020 
13021   ins_cost(275); // XXX
13022   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
13023             "movl    $dst, -1\n\t"
13024             "jl,s    done\n\t"
13025             "setne   $dst\n\t"
13026             "movzbl  $dst, $dst\n\t"
13027     "done:" %}
13028   ins_encode %{
13029     Label done;
13030     __ cmpq($src1$$Register, $src2$$Register);
13031     __ movl($dst$$Register, -1);
13032     __ jccb(Assembler::less, done);
13033     __ setne($dst$$Register);
13034     __ movzbl($dst$$Register, $dst$$Register);
13035     __ bind(done);
13036   %}
13037   ins_pipe(pipe_slow);
13038 %}
13039 
13040 // Manifest a CmpUL result in an integer register.  Very painful.
13041 // This is the test to avoid.
13042 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
13043 %{
13044   match(Set dst (CmpUL3 src1 src2));
13045   effect(KILL flags);
13046 
13047   ins_cost(275); // XXX
13048   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
13049             "movl    $dst, -1\n\t"
13050             "jb,u    done\n\t"
13051             "setne   $dst\n\t"
13052             "movzbl  $dst, $dst\n\t"
13053     "done:" %}
13054   ins_encode %{
13055     Label done;
13056     __ cmpq($src1$$Register, $src2$$Register);
13057     __ movl($dst$$Register, -1);
13058     __ jccb(Assembler::below, done);
13059     __ setne($dst$$Register);
13060     __ movzbl($dst$$Register, $dst$$Register);
13061     __ bind(done);
13062   %}
13063   ins_pipe(pipe_slow);
13064 %}
13065 
13066 // Unsigned long compare Instructions; really, same as signed long except they
13067 // produce an rFlagsRegU instead of rFlagsReg.
13068 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
13069 %{
13070   match(Set cr (CmpUL op1 op2));
13071 
13072   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13073   ins_encode %{
13074     __ cmpq($op1$$Register, $op2$$Register);
13075   %}
13076   ins_pipe(ialu_cr_reg_reg);
13077 %}
13078 
13079 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
13080 %{
13081   match(Set cr (CmpUL op1 op2));
13082 
13083   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13084   ins_encode %{
13085     __ cmpq($op1$$Register, $op2$$constant);
13086   %}
13087   ins_pipe(ialu_cr_reg_imm);
13088 %}
13089 
13090 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
13091 %{
13092   match(Set cr (CmpUL op1 (LoadL op2)));
13093 
13094   format %{ "cmpq    $op1, $op2\t# unsigned" %}
13095   ins_encode %{
13096     __ cmpq($op1$$Register, $op2$$Address);
13097   %}
13098   ins_pipe(ialu_cr_reg_mem);
13099 %}
13100 
13101 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
13102 %{
13103   match(Set cr (CmpUL src zero));
13104 
13105   format %{ "testq   $src, $src\t# unsigned" %}
13106   ins_encode %{
13107     __ testq($src$$Register, $src$$Register);
13108   %}
13109   ins_pipe(ialu_cr_reg_imm);
13110 %}
13111 
13112 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
13113 %{
13114   match(Set cr (CmpI (LoadB mem) imm));
13115 
13116   ins_cost(125);
13117   format %{ "cmpb    $mem, $imm" %}
13118   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
13119   ins_pipe(ialu_cr_reg_mem);
13120 %}
13121 
13122 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
13123 %{
13124   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
13125 
13126   ins_cost(125);
13127   format %{ "testb   $mem, $imm\t# ubyte" %}
13128   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13129   ins_pipe(ialu_cr_reg_mem);
13130 %}
13131 
13132 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
13133 %{
13134   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
13135 
13136   ins_cost(125);
13137   format %{ "testb   $mem, $imm\t# byte" %}
13138   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
13139   ins_pipe(ialu_cr_reg_mem);
13140 %}
13141 
13142 //----------Max and Min--------------------------------------------------------
13143 // Min Instructions
13144 
13145 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
13146 %{
13147   effect(USE_DEF dst, USE src, USE cr);
13148 
13149   format %{ "cmovlgt $dst, $src\t# min" %}
13150   ins_encode %{
13151     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
13152   %}
13153   ins_pipe(pipe_cmov_reg);
13154 %}
13155 
13156 
13157 instruct minI_rReg(rRegI dst, rRegI src)
13158 %{
13159   match(Set dst (MinI dst src));
13160 
13161   ins_cost(200);
13162   expand %{
13163     rFlagsReg cr;
13164     compI_rReg(cr, dst, src);
13165     cmovI_reg_g(dst, src, cr);
13166   %}
13167 %}
13168 
13169 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
13170 %{
13171   effect(USE_DEF dst, USE src, USE cr);
13172 
13173   format %{ "cmovllt $dst, $src\t# max" %}
13174   ins_encode %{
13175     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
13176   %}
13177   ins_pipe(pipe_cmov_reg);
13178 %}
13179 
13180 
13181 instruct maxI_rReg(rRegI dst, rRegI src)
13182 %{
13183   match(Set dst (MaxI dst src));
13184 
13185   ins_cost(200);
13186   expand %{
13187     rFlagsReg cr;
13188     compI_rReg(cr, dst, src);
13189     cmovI_reg_l(dst, src, cr);
13190   %}
13191 %}
13192 
13193 // ============================================================================
13194 // Branch Instructions
13195 
13196 // Jump Direct - Label defines a relative address from JMP+1
13197 instruct jmpDir(label labl)
13198 %{
13199   match(Goto);
13200   effect(USE labl);
13201 
13202   ins_cost(300);
13203   format %{ "jmp     $labl" %}
13204   size(5);
13205   ins_encode %{
13206     Label* L = $labl$$label;
13207     __ jmp(*L, false); // Always long jump
13208   %}
13209   ins_pipe(pipe_jmp);
13210 %}
13211 
13212 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13213 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
13214 %{
13215   match(If cop cr);
13216   effect(USE labl);
13217 
13218   ins_cost(300);
13219   format %{ "j$cop     $labl" %}
13220   size(6);
13221   ins_encode %{
13222     Label* L = $labl$$label;
13223     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13224   %}
13225   ins_pipe(pipe_jcc);
13226 %}
13227 
13228 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13229 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
13230 %{
13231   match(CountedLoopEnd cop cr);
13232   effect(USE labl);
13233 
13234   ins_cost(300);
13235   format %{ "j$cop     $labl\t# loop end" %}
13236   size(6);
13237   ins_encode %{
13238     Label* L = $labl$$label;
13239     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13240   %}
13241   ins_pipe(pipe_jcc);
13242 %}
13243 
13244 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13245 instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13246   match(CountedLoopEnd cop cmp);
13247   effect(USE labl);
13248 
13249   ins_cost(300);
13250   format %{ "j$cop,u   $labl\t# loop end" %}
13251   size(6);
13252   ins_encode %{
13253     Label* L = $labl$$label;
13254     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13255   %}
13256   ins_pipe(pipe_jcc);
13257 %}
13258 
13259 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13260   match(CountedLoopEnd cop cmp);
13261   effect(USE labl);
13262 
13263   ins_cost(200);
13264   format %{ "j$cop,u   $labl\t# loop end" %}
13265   size(6);
13266   ins_encode %{
13267     Label* L = $labl$$label;
13268     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13269   %}
13270   ins_pipe(pipe_jcc);
13271 %}
13272 
13273 // Jump Direct Conditional - using unsigned comparison
13274 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13275   match(If cop cmp);
13276   effect(USE labl);
13277 
13278   ins_cost(300);
13279   format %{ "j$cop,u   $labl" %}
13280   size(6);
13281   ins_encode %{
13282     Label* L = $labl$$label;
13283     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13284   %}
13285   ins_pipe(pipe_jcc);
13286 %}
13287 
13288 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13289   match(If cop cmp);
13290   effect(USE labl);
13291 
13292   ins_cost(200);
13293   format %{ "j$cop,u   $labl" %}
13294   size(6);
13295   ins_encode %{
13296     Label* L = $labl$$label;
13297     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
13298   %}
13299   ins_pipe(pipe_jcc);
13300 %}
13301 
13302 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13303   match(If cop cmp);
13304   effect(USE labl);
13305 
13306   ins_cost(200);
13307   format %{ $$template
13308     if ($cop$$cmpcode == Assembler::notEqual) {
13309       $$emit$$"jp,u    $labl\n\t"
13310       $$emit$$"j$cop,u   $labl"
13311     } else {
13312       $$emit$$"jp,u    done\n\t"
13313       $$emit$$"j$cop,u   $labl\n\t"
13314       $$emit$$"done:"
13315     }
13316   %}
13317   ins_encode %{
13318     Label* l = $labl$$label;
13319     if ($cop$$cmpcode == Assembler::notEqual) {
13320       __ jcc(Assembler::parity, *l, false);
13321       __ jcc(Assembler::notEqual, *l, false);
13322     } else if ($cop$$cmpcode == Assembler::equal) {
13323       Label done;
13324       __ jccb(Assembler::parity, done);
13325       __ jcc(Assembler::equal, *l, false);
13326       __ bind(done);
13327     } else {
13328        ShouldNotReachHere();
13329     }
13330   %}
13331   ins_pipe(pipe_jcc);
13332 %}
13333 
13334 // ============================================================================
13335 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
13336 // superklass array for an instance of the superklass.  Set a hidden
13337 // internal cache on a hit (cache is checked with exposed code in
13338 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
13339 // encoding ALSO sets flags.
13340 
13341 instruct partialSubtypeCheck(rdi_RegP result,
13342                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13343                              rFlagsReg cr)
13344 %{
13345   match(Set result (PartialSubtypeCheck sub super));
13346   effect(KILL rcx, KILL cr);
13347 
13348   ins_cost(1100);  // slightly larger than the next version
13349   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13350             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13351             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13352             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
13353             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
13354             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13355             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
13356     "miss:\t" %}
13357 
13358   opcode(0x1); // Force a XOR of RDI
13359   ins_encode(enc_PartialSubtypeCheck());
13360   ins_pipe(pipe_slow);
13361 %}
13362 
13363 instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
13364                                      rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
13365                                      immP0 zero,
13366                                      rdi_RegP result)
13367 %{
13368   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13369   effect(KILL rcx, KILL result);
13370 
13371   ins_cost(1000);
13372   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
13373             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
13374             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
13375             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
13376             "jne,s   miss\t\t# Missed: flags nz\n\t"
13377             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
13378     "miss:\t" %}
13379 
13380   opcode(0x0); // No need to XOR RDI
13381   ins_encode(enc_PartialSubtypeCheck());
13382   ins_pipe(pipe_slow);
13383 %}
13384 
13385 // ============================================================================
13386 // Branch Instructions -- short offset versions
13387 //
13388 // These instructions are used to replace jumps of a long offset (the default
13389 // match) with jumps of a shorter offset.  These instructions are all tagged
13390 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13391 // match rules in general matching.  Instead, the ADLC generates a conversion
13392 // method in the MachNode which can be used to do in-place replacement of the
13393 // long variant with the shorter variant.  The compiler will determine if a
13394 // branch can be taken by the is_short_branch_offset() predicate in the machine
13395 // specific code section of the file.
13396 
13397 // Jump Direct - Label defines a relative address from JMP+1
13398 instruct jmpDir_short(label labl) %{
13399   match(Goto);
13400   effect(USE labl);
13401 
13402   ins_cost(300);
13403   format %{ "jmp,s   $labl" %}
13404   size(2);
13405   ins_encode %{
13406     Label* L = $labl$$label;
13407     __ jmpb(*L);
13408   %}
13409   ins_pipe(pipe_jmp);
13410   ins_short_branch(1);
13411 %}
13412 
13413 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13414 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
13415   match(If cop cr);
13416   effect(USE labl);
13417 
13418   ins_cost(300);
13419   format %{ "j$cop,s   $labl" %}
13420   size(2);
13421   ins_encode %{
13422     Label* L = $labl$$label;
13423     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13424   %}
13425   ins_pipe(pipe_jcc);
13426   ins_short_branch(1);
13427 %}
13428 
13429 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13430 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
13431   match(CountedLoopEnd cop cr);
13432   effect(USE labl);
13433 
13434   ins_cost(300);
13435   format %{ "j$cop,s   $labl\t# loop end" %}
13436   size(2);
13437   ins_encode %{
13438     Label* L = $labl$$label;
13439     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13440   %}
13441   ins_pipe(pipe_jcc);
13442   ins_short_branch(1);
13443 %}
13444 
13445 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13446 instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13447   match(CountedLoopEnd cop cmp);
13448   effect(USE labl);
13449 
13450   ins_cost(300);
13451   format %{ "j$cop,us  $labl\t# loop end" %}
13452   size(2);
13453   ins_encode %{
13454     Label* L = $labl$$label;
13455     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13456   %}
13457   ins_pipe(pipe_jcc);
13458   ins_short_branch(1);
13459 %}
13460 
13461 instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13462   match(CountedLoopEnd cop cmp);
13463   effect(USE labl);
13464 
13465   ins_cost(300);
13466   format %{ "j$cop,us  $labl\t# loop end" %}
13467   size(2);
13468   ins_encode %{
13469     Label* L = $labl$$label;
13470     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13471   %}
13472   ins_pipe(pipe_jcc);
13473   ins_short_branch(1);
13474 %}
13475 
13476 // Jump Direct Conditional - using unsigned comparison
13477 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
13478   match(If cop cmp);
13479   effect(USE labl);
13480 
13481   ins_cost(300);
13482   format %{ "j$cop,us  $labl" %}
13483   size(2);
13484   ins_encode %{
13485     Label* L = $labl$$label;
13486     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13487   %}
13488   ins_pipe(pipe_jcc);
13489   ins_short_branch(1);
13490 %}
13491 
13492 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
13493   match(If cop cmp);
13494   effect(USE labl);
13495 
13496   ins_cost(300);
13497   format %{ "j$cop,us  $labl" %}
13498   size(2);
13499   ins_encode %{
13500     Label* L = $labl$$label;
13501     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
13502   %}
13503   ins_pipe(pipe_jcc);
13504   ins_short_branch(1);
13505 %}
13506 
13507 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
13508   match(If cop cmp);
13509   effect(USE labl);
13510 
13511   ins_cost(300);
13512   format %{ $$template
13513     if ($cop$$cmpcode == Assembler::notEqual) {
13514       $$emit$$"jp,u,s  $labl\n\t"
13515       $$emit$$"j$cop,u,s  $labl"
13516     } else {
13517       $$emit$$"jp,u,s  done\n\t"
13518       $$emit$$"j$cop,u,s  $labl\n\t"
13519       $$emit$$"done:"
13520     }
13521   %}
13522   size(4);
13523   ins_encode %{
13524     Label* l = $labl$$label;
13525     if ($cop$$cmpcode == Assembler::notEqual) {
13526       __ jccb(Assembler::parity, *l);
13527       __ jccb(Assembler::notEqual, *l);
13528     } else if ($cop$$cmpcode == Assembler::equal) {
13529       Label done;
13530       __ jccb(Assembler::parity, done);
13531       __ jccb(Assembler::equal, *l);
13532       __ bind(done);
13533     } else {
13534        ShouldNotReachHere();
13535     }
13536   %}
13537   ins_pipe(pipe_jcc);
13538   ins_short_branch(1);
13539 %}
13540 
13541 // ============================================================================
13542 // inlined locking and unlocking
13543 
13544 instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
13545   predicate(Compile::current()->use_rtm());
13546   match(Set cr (FastLock object box));
13547   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13548   ins_cost(300);
13549   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13550   ins_encode %{
13551     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13552                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13553                  _rtm_counters, _stack_rtm_counters,
13554                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13555                  true, ra_->C->profile_rtm());
13556   %}
13557   ins_pipe(pipe_slow);
13558 %}
13559 
13560 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr, rRegP cx1) %{
13561   predicate(!Compile::current()->use_rtm());
13562   match(Set cr (FastLock object box));
13563   effect(TEMP tmp, TEMP scr, TEMP cx1, USE_KILL box);
13564   ins_cost(300);
13565   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
13566   ins_encode %{
13567     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13568                  $scr$$Register, $cx1$$Register, noreg, NULL, NULL, NULL, false, false);
13569   %}
13570   ins_pipe(pipe_slow);
13571 %}
13572 
13573 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
13574   match(Set cr (FastUnlock object box));
13575   effect(TEMP tmp, USE_KILL box);
13576   ins_cost(300);
13577   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
13578   ins_encode %{
13579     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13580   %}
13581   ins_pipe(pipe_slow);
13582 %}
13583 
13584 
13585 // ============================================================================
13586 // Safepoint Instructions
13587 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
13588 %{
13589   match(SafePoint poll);
13590   effect(KILL cr, USE poll);
13591 
13592   format %{ "testl   rax, [$poll]\t"
13593             "# Safepoint: poll for GC" %}
13594   ins_cost(125);
13595   size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13596   ins_encode %{
13597     __ relocate(relocInfo::poll_type);
13598     address pre_pc = __ pc();
13599     __ testl(rax, Address($poll$$Register, 0));
13600     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
13601   %}
13602   ins_pipe(ialu_reg_mem);
13603 %}
13604 
13605 instruct mask_all_evexL(kReg dst, rRegL src) %{
13606   match(Set dst (MaskAll src));
13607   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
13608   ins_encode %{
13609     int mask_len = Matcher::vector_length(this);
13610     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13611   %}
13612   ins_pipe( pipe_slow );
13613 %}
13614 
13615 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
13616   predicate(Matcher::vector_length(n) > 32);
13617   match(Set dst (MaskAll src));
13618   effect(TEMP tmp);
13619   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
13620   ins_encode %{
13621     int mask_len = Matcher::vector_length(this);
13622     __ movslq($tmp$$Register, $src$$Register);
13623     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
13624   %}
13625   ins_pipe( pipe_slow );
13626 %}
13627 
13628 // ============================================================================
13629 // Procedure Call/Return Instructions
13630 // Call Java Static Instruction
13631 // Note: If this code changes, the corresponding ret_addr_offset() and
13632 //       compute_padding() functions will have to be adjusted.
13633 instruct CallStaticJavaDirect(method meth) %{
13634   match(CallStaticJava);
13635   effect(USE meth);
13636 
13637   ins_cost(300);
13638   format %{ "call,static " %}
13639   opcode(0xE8); /* E8 cd */
13640   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
13641   ins_pipe(pipe_slow);
13642   ins_alignment(4);
13643 %}
13644 
13645 // Call Java Dynamic Instruction
13646 // Note: If this code changes, the corresponding ret_addr_offset() and
13647 //       compute_padding() functions will have to be adjusted.
13648 instruct CallDynamicJavaDirect(method meth)
13649 %{
13650   match(CallDynamicJava);
13651   effect(USE meth);
13652 
13653   ins_cost(300);
13654   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
13655             "call,dynamic " %}
13656   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
13657   ins_pipe(pipe_slow);
13658   ins_alignment(4);
13659 %}
13660 
13661 // Call Runtime Instruction
13662 instruct CallRuntimeDirect(method meth)
13663 %{
13664   match(CallRuntime);
13665   effect(USE meth);
13666 
13667   ins_cost(300);
13668   format %{ "call,runtime " %}
13669   ins_encode(clear_avx, Java_To_Runtime(meth));
13670   ins_pipe(pipe_slow);
13671 %}
13672 
13673 // Call runtime without safepoint
13674 instruct CallLeafDirect(method meth)
13675 %{
13676   match(CallLeaf);
13677   effect(USE meth);
13678 
13679   ins_cost(300);
13680   format %{ "call_leaf,runtime " %}
13681   ins_encode(clear_avx, Java_To_Runtime(meth));
13682   ins_pipe(pipe_slow);
13683 %}
13684 
13685 // Call runtime without safepoint and with vector arguments
13686 instruct CallLeafDirectVector(method meth)
13687 %{
13688   match(CallLeafVector);
13689   effect(USE meth);
13690 
13691   ins_cost(300);
13692   format %{ "call_leaf,vector " %}
13693   ins_encode(Java_To_Runtime(meth));
13694   ins_pipe(pipe_slow);
13695 %}
13696 
13697 // Call runtime without safepoint
13698 // entry point is null, target holds the address to call
13699 instruct CallLeafNoFPInDirect(rRegP target)
13700 %{
13701   predicate(n->as_Call()->entry_point() == NULL);
13702   match(CallLeafNoFP target);
13703 
13704   ins_cost(300);
13705   format %{ "call_leaf_nofp,runtime indirect " %}
13706   ins_encode %{
13707      __ call($target$$Register);
13708   %}
13709 
13710   ins_pipe(pipe_slow);
13711 %}
13712 
13713 instruct CallLeafNoFPDirect(method meth)
13714 %{
13715   predicate(n->as_Call()->entry_point() != NULL);
13716   match(CallLeafNoFP);
13717   effect(USE meth);
13718 
13719   ins_cost(300);
13720   format %{ "call_leaf_nofp,runtime " %}
13721   ins_encode(clear_avx, Java_To_Runtime(meth));
13722   ins_pipe(pipe_slow);
13723 %}
13724 
13725 // Return Instruction
13726 // Remove the return address & jump to it.
13727 // Notice: We always emit a nop after a ret to make sure there is room
13728 // for safepoint patching
13729 instruct Ret()
13730 %{
13731   match(Return);
13732 
13733   format %{ "ret" %}
13734   ins_encode %{
13735     __ ret(0);
13736   %}
13737   ins_pipe(pipe_jmp);
13738 %}
13739 
13740 // Tail Call; Jump from runtime stub to Java code.
13741 // Also known as an 'interprocedural jump'.
13742 // Target of jump will eventually return to caller.
13743 // TailJump below removes the return address.
13744 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
13745 %{
13746   match(TailCall jump_target method_ptr);
13747 
13748   ins_cost(300);
13749   format %{ "jmp     $jump_target\t# rbx holds method" %}
13750   ins_encode %{
13751     __ jmp($jump_target$$Register);
13752   %}
13753   ins_pipe(pipe_jmp);
13754 %}
13755 
13756 // Tail Jump; remove the return address; jump to target.
13757 // TailCall above leaves the return address around.
13758 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
13759 %{
13760   match(TailJump jump_target ex_oop);
13761 
13762   ins_cost(300);
13763   format %{ "popq    rdx\t# pop return address\n\t"
13764             "jmp     $jump_target" %}
13765   ins_encode %{
13766     __ popq(as_Register(RDX_enc));
13767     __ jmp($jump_target$$Register);
13768   %}
13769   ins_pipe(pipe_jmp);
13770 %}
13771 
13772 // Create exception oop: created by stack-crawling runtime code.
13773 // Created exception is now available to this handler, and is setup
13774 // just prior to jumping to this handler.  No code emitted.
13775 instruct CreateException(rax_RegP ex_oop)
13776 %{
13777   match(Set ex_oop (CreateEx));
13778 
13779   size(0);
13780   // use the following format syntax
13781   format %{ "# exception oop is in rax; no code emitted" %}
13782   ins_encode();
13783   ins_pipe(empty);
13784 %}
13785 
13786 // Rethrow exception:
13787 // The exception oop will come in the first argument position.
13788 // Then JUMP (not call) to the rethrow stub code.
13789 instruct RethrowException()
13790 %{
13791   match(Rethrow);
13792 
13793   // use the following format syntax
13794   format %{ "jmp     rethrow_stub" %}
13795   ins_encode(enc_rethrow);
13796   ins_pipe(pipe_jmp);
13797 %}
13798 
13799 // ============================================================================
13800 // This name is KNOWN by the ADLC and cannot be changed.
13801 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13802 // for this guy.
13803 instruct tlsLoadP(r15_RegP dst) %{
13804   match(Set dst (ThreadLocal));
13805   effect(DEF dst);
13806 
13807   size(0);
13808   format %{ "# TLS is in R15" %}
13809   ins_encode( /*empty encoding*/ );
13810   ins_pipe(ialu_reg_reg);
13811 %}
13812 
13813 
13814 //----------PEEPHOLE RULES-----------------------------------------------------
13815 // These must follow all instruction definitions as they use the names
13816 // defined in the instructions definitions.
13817 //
13818 // peepmatch ( root_instr_name [preceding_instruction]* );
13819 //
13820 // peepconstraint %{
13821 // (instruction_number.operand_name relational_op instruction_number.operand_name
13822 //  [, ...] );
13823 // // instruction numbers are zero-based using left to right order in peepmatch
13824 //
13825 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13826 // // provide an instruction_number.operand_name for each operand that appears
13827 // // in the replacement instruction's match rule
13828 //
13829 // ---------VM FLAGS---------------------------------------------------------
13830 //
13831 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13832 //
13833 // Each peephole rule is given an identifying number starting with zero and
13834 // increasing by one in the order seen by the parser.  An individual peephole
13835 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13836 // on the command-line.
13837 //
13838 // ---------CURRENT LIMITATIONS----------------------------------------------
13839 //
13840 // Only match adjacent instructions in same basic block
13841 // Only equality constraints
13842 // Only constraints between operands, not (0.dest_reg == RAX_enc)
13843 // Only one replacement instruction
13844 //
13845 // ---------EXAMPLE----------------------------------------------------------
13846 //
13847 // // pertinent parts of existing instructions in architecture description
13848 // instruct movI(rRegI dst, rRegI src)
13849 // %{
13850 //   match(Set dst (CopyI src));
13851 // %}
13852 //
13853 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
13854 // %{
13855 //   match(Set dst (AddI dst src));
13856 //   effect(KILL cr);
13857 // %}
13858 //
13859 // // Change (inc mov) to lea
13860 // peephole %{
13861 //   // increment preceded by register-register move
13862 //   peepmatch ( incI_rReg movI );
13863 //   // require that the destination register of the increment
13864 //   // match the destination register of the move
13865 //   peepconstraint ( 0.dst == 1.dst );
13866 //   // construct a replacement instruction that sets
13867 //   // the destination to ( move's source register + one )
13868 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
13869 // %}
13870 //
13871 
13872 // Implementation no longer uses movX instructions since
13873 // machine-independent system no longer uses CopyX nodes.
13874 //
13875 // peephole
13876 // %{
13877 //   peepmatch (incI_rReg movI);
13878 //   peepconstraint (0.dst == 1.dst);
13879 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13880 // %}
13881 
13882 // peephole
13883 // %{
13884 //   peepmatch (decI_rReg movI);
13885 //   peepconstraint (0.dst == 1.dst);
13886 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13887 // %}
13888 
13889 // peephole
13890 // %{
13891 //   peepmatch (addI_rReg_imm movI);
13892 //   peepconstraint (0.dst == 1.dst);
13893 //   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
13894 // %}
13895 
13896 // peephole
13897 // %{
13898 //   peepmatch (incL_rReg movL);
13899 //   peepconstraint (0.dst == 1.dst);
13900 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13901 // %}
13902 
13903 // peephole
13904 // %{
13905 //   peepmatch (decL_rReg movL);
13906 //   peepconstraint (0.dst == 1.dst);
13907 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13908 // %}
13909 
13910 // peephole
13911 // %{
13912 //   peepmatch (addL_rReg_imm movL);
13913 //   peepconstraint (0.dst == 1.dst);
13914 //   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
13915 // %}
13916 
13917 // peephole
13918 // %{
13919 //   peepmatch (addP_rReg_imm movP);
13920 //   peepconstraint (0.dst == 1.dst);
13921 //   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
13922 // %}
13923 
13924 // // Change load of spilled value to only a spill
13925 // instruct storeI(memory mem, rRegI src)
13926 // %{
13927 //   match(Set mem (StoreI mem src));
13928 // %}
13929 //
13930 // instruct loadI(rRegI dst, memory mem)
13931 // %{
13932 //   match(Set dst (LoadI mem));
13933 // %}
13934 //
13935 
13936 peephole
13937 %{
13938   peepmatch (loadI storeI);
13939   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13940   peepreplace (storeI(1.mem 1.mem 1.src));
13941 %}
13942 
13943 peephole
13944 %{
13945   peepmatch (loadL storeL);
13946   peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
13947   peepreplace (storeL(1.mem 1.mem 1.src));
13948 %}
13949 
13950 //----------SMARTSPILL RULES---------------------------------------------------
13951 // These must follow all instruction definitions as they use the names
13952 // defined in the instructions definitions.