1 //
    2 // Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
    3 // Copyright (c) 2012, 2022 SAP SE. All rights reserved.
    4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    5 //
    6 // This code is free software; you can redistribute it and/or modify it
    7 // under the terms of the GNU General Public License version 2 only, as
    8 // published by the Free Software Foundation.
    9 //
   10 // This code is distributed in the hope that it will be useful, but WITHOUT
   11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13 // version 2 for more details (a copy is included in the LICENSE file that
   14 // accompanied this code).
   15 //
   16 // You should have received a copy of the GNU General Public License version
   17 // 2 along with this work; if not, write to the Free Software Foundation,
   18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   19 //
   20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   21 // or visit www.oracle.com if you need additional information or have any
   22 // questions.
   23 //
   24 //
   25 
   26 //
   27 // PPC64 Architecture Description File
   28 //
   29 
   30 //----------REGISTER DEFINITION BLOCK------------------------------------------
   31 // This information is used by the matcher and the register allocator to
   32 // describe individual registers and classes of registers within the target
   33 // architecture.
   34 register %{
   35 //----------Architecture Description Register Definitions----------------------
   36 // General Registers
   37 // "reg_def"  name (register save type, C convention save type,
   38 //                  ideal register type, encoding);
   39 //
   40 // Register Save Types:
   41 //
   42 //   NS  = No-Save:     The register allocator assumes that these registers
   43 //                      can be used without saving upon entry to the method, &
   44 //                      that they do not need to be saved at call sites.
   45 //
   46 //   SOC = Save-On-Call: The register allocator assumes that these registers
   47 //                      can be used without saving upon entry to the method,
   48 //                      but that they must be saved at call sites.
   49 //                      These are called "volatiles" on ppc.
   50 //
   51 //   SOE = Save-On-Entry: The register allocator assumes that these registers
   52 //                      must be saved before using them upon entry to the
   53 //                      method, but they do not need to be saved at call
   54 //                      sites.
   55 //                      These are called "nonvolatiles" on ppc.
   56 //
   57 //   AS  = Always-Save:   The register allocator assumes that these registers
   58 //                      must be saved before using them upon entry to the
   59 //                      method, & that they must be saved at call sites.
   60 //
   61 // Ideal Register Type is used to determine how to save & restore a
   62 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   63 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
   64 //
   65 // The encoding number is the actual bit-pattern placed into the opcodes.
   66 //
   67 // PPC64 register definitions, based on the 64-bit PowerPC ELF ABI
   68 // Supplement Version 1.7 as of 2003-10-29.
   69 //
   70 // For each 64-bit register we must define two registers: the register
   71 // itself, e.g. R3, and a corresponding virtual other (32-bit-)'half',
   72 // e.g. R3_H, which is needed by the allocator, but is not used
   73 // for stores, loads, etc.
   74 
   75 // ----------------------------
   76 // Integer/Long Registers
   77 // ----------------------------
   78 
   79   // PPC64 has 32 64-bit integer registers.
   80 
   81   // types: v = volatile, nv = non-volatile, s = system
   82   reg_def R0   ( SOC, SOC, Op_RegI,  0, R0->as_VMReg()         );  // v   used in prologs
   83   reg_def R0_H ( SOC, SOC, Op_RegI, 99, R0->as_VMReg()->next() );
   84   reg_def R1   ( NS,  NS,  Op_RegI,  1, R1->as_VMReg()         );  // s   SP
   85   reg_def R1_H ( NS,  NS,  Op_RegI, 99, R1->as_VMReg()->next() );
   86   reg_def R2   ( SOC, SOC, Op_RegI,  2, R2->as_VMReg()         );  // v   TOC
   87   reg_def R2_H ( SOC, SOC, Op_RegI, 99, R2->as_VMReg()->next() );
   88   reg_def R3   ( SOC, SOC, Op_RegI,  3, R3->as_VMReg()         );  // v   iarg1 & iret
   89   reg_def R3_H ( SOC, SOC, Op_RegI, 99, R3->as_VMReg()->next() );
   90   reg_def R4   ( SOC, SOC, Op_RegI,  4, R4->as_VMReg()         );  //     iarg2
   91   reg_def R4_H ( SOC, SOC, Op_RegI, 99, R4->as_VMReg()->next() );
   92   reg_def R5   ( SOC, SOC, Op_RegI,  5, R5->as_VMReg()         );  // v   iarg3
   93   reg_def R5_H ( SOC, SOC, Op_RegI, 99, R5->as_VMReg()->next() );
   94   reg_def R6   ( SOC, SOC, Op_RegI,  6, R6->as_VMReg()         );  // v   iarg4
   95   reg_def R6_H ( SOC, SOC, Op_RegI, 99, R6->as_VMReg()->next() );
   96   reg_def R7   ( SOC, SOC, Op_RegI,  7, R7->as_VMReg()         );  // v   iarg5
   97   reg_def R7_H ( SOC, SOC, Op_RegI, 99, R7->as_VMReg()->next() );
   98   reg_def R8   ( SOC, SOC, Op_RegI,  8, R8->as_VMReg()         );  // v   iarg6
   99   reg_def R8_H ( SOC, SOC, Op_RegI, 99, R8->as_VMReg()->next() );
  100   reg_def R9   ( SOC, SOC, Op_RegI,  9, R9->as_VMReg()         );  // v   iarg7
  101   reg_def R9_H ( SOC, SOC, Op_RegI, 99, R9->as_VMReg()->next() );
  102   reg_def R10  ( SOC, SOC, Op_RegI, 10, R10->as_VMReg()        );  // v   iarg8
  103   reg_def R10_H( SOC, SOC, Op_RegI, 99, R10->as_VMReg()->next());
  104   reg_def R11  ( SOC, SOC, Op_RegI, 11, R11->as_VMReg()        );  // v   ENV / scratch
  105   reg_def R11_H( SOC, SOC, Op_RegI, 99, R11->as_VMReg()->next());
  106   reg_def R12  ( SOC, SOC, Op_RegI, 12, R12->as_VMReg()        );  // v   scratch
  107   reg_def R12_H( SOC, SOC, Op_RegI, 99, R12->as_VMReg()->next());
  108   reg_def R13  ( NS,  NS,  Op_RegI, 13, R13->as_VMReg()        );  // s   system thread id
  109   reg_def R13_H( NS,  NS,  Op_RegI, 99, R13->as_VMReg()->next());
  110   reg_def R14  ( SOC, SOE, Op_RegI, 14, R14->as_VMReg()        );  // nv
  111   reg_def R14_H( SOC, SOE, Op_RegI, 99, R14->as_VMReg()->next());
  112   reg_def R15  ( SOC, SOE, Op_RegI, 15, R15->as_VMReg()        );  // nv
  113   reg_def R15_H( SOC, SOE, Op_RegI, 99, R15->as_VMReg()->next());
  114   reg_def R16  ( SOC, SOE, Op_RegI, 16, R16->as_VMReg()        );  // nv
  115   reg_def R16_H( SOC, SOE, Op_RegI, 99, R16->as_VMReg()->next());
  116   reg_def R17  ( SOC, SOE, Op_RegI, 17, R17->as_VMReg()        );  // nv
  117   reg_def R17_H( SOC, SOE, Op_RegI, 99, R17->as_VMReg()->next());
  118   reg_def R18  ( SOC, SOE, Op_RegI, 18, R18->as_VMReg()        );  // nv
  119   reg_def R18_H( SOC, SOE, Op_RegI, 99, R18->as_VMReg()->next());
  120   reg_def R19  ( SOC, SOE, Op_RegI, 19, R19->as_VMReg()        );  // nv
  121   reg_def R19_H( SOC, SOE, Op_RegI, 99, R19->as_VMReg()->next());
  122   reg_def R20  ( SOC, SOE, Op_RegI, 20, R20->as_VMReg()        );  // nv
  123   reg_def R20_H( SOC, SOE, Op_RegI, 99, R20->as_VMReg()->next());
  124   reg_def R21  ( SOC, SOE, Op_RegI, 21, R21->as_VMReg()        );  // nv
  125   reg_def R21_H( SOC, SOE, Op_RegI, 99, R21->as_VMReg()->next());
  126   reg_def R22  ( SOC, SOE, Op_RegI, 22, R22->as_VMReg()        );  // nv
  127   reg_def R22_H( SOC, SOE, Op_RegI, 99, R22->as_VMReg()->next());
  128   reg_def R23  ( SOC, SOE, Op_RegI, 23, R23->as_VMReg()        );  // nv
  129   reg_def R23_H( SOC, SOE, Op_RegI, 99, R23->as_VMReg()->next());
  130   reg_def R24  ( SOC, SOE, Op_RegI, 24, R24->as_VMReg()        );  // nv
  131   reg_def R24_H( SOC, SOE, Op_RegI, 99, R24->as_VMReg()->next());
  132   reg_def R25  ( SOC, SOE, Op_RegI, 25, R25->as_VMReg()        );  // nv
  133   reg_def R25_H( SOC, SOE, Op_RegI, 99, R25->as_VMReg()->next());
  134   reg_def R26  ( SOC, SOE, Op_RegI, 26, R26->as_VMReg()        );  // nv
  135   reg_def R26_H( SOC, SOE, Op_RegI, 99, R26->as_VMReg()->next());
  136   reg_def R27  ( SOC, SOE, Op_RegI, 27, R27->as_VMReg()        );  // nv
  137   reg_def R27_H( SOC, SOE, Op_RegI, 99, R27->as_VMReg()->next());
  138   reg_def R28  ( SOC, SOE, Op_RegI, 28, R28->as_VMReg()        );  // nv
  139   reg_def R28_H( SOC, SOE, Op_RegI, 99, R28->as_VMReg()->next());
  140   reg_def R29  ( SOC, SOE, Op_RegI, 29, R29->as_VMReg()        );  // nv
  141   reg_def R29_H( SOC, SOE, Op_RegI, 99, R29->as_VMReg()->next());
  142   reg_def R30  ( SOC, SOE, Op_RegI, 30, R30->as_VMReg()        );  // nv
  143   reg_def R30_H( SOC, SOE, Op_RegI, 99, R30->as_VMReg()->next());
  144   reg_def R31  ( SOC, SOE, Op_RegI, 31, R31->as_VMReg()        );  // nv
  145   reg_def R31_H( SOC, SOE, Op_RegI, 99, R31->as_VMReg()->next());
  146 
  147 
  148 // ----------------------------
  149 // Float/Double Registers
  150 // ----------------------------
  151 
  152   // Double Registers
  153   // The rules of ADL require that double registers be defined in pairs.
  154   // Each pair must be two 32-bit values, but not necessarily a pair of
  155   // single float registers. In each pair, ADLC-assigned register numbers
  156   // must be adjacent, with the lower number even. Finally, when the
  157   // CPU stores such a register pair to memory, the word associated with
  158   // the lower ADLC-assigned number must be stored to the lower address.
  159 
  160   // PPC64 has 32 64-bit floating-point registers. Each can store a single
  161   // or double precision floating-point value.
  162 
  163   // types: v = volatile, nv = non-volatile, s = system
  164   reg_def F0   ( SOC, SOC, Op_RegF,  0, F0->as_VMReg()         );  // v   scratch
  165   reg_def F0_H ( SOC, SOC, Op_RegF, 99, F0->as_VMReg()->next() );
  166   reg_def F1   ( SOC, SOC, Op_RegF,  1, F1->as_VMReg()         );  // v   farg1 & fret
  167   reg_def F1_H ( SOC, SOC, Op_RegF, 99, F1->as_VMReg()->next() );
  168   reg_def F2   ( SOC, SOC, Op_RegF,  2, F2->as_VMReg()         );  // v   farg2
  169   reg_def F2_H ( SOC, SOC, Op_RegF, 99, F2->as_VMReg()->next() );
  170   reg_def F3   ( SOC, SOC, Op_RegF,  3, F3->as_VMReg()         );  // v   farg3
  171   reg_def F3_H ( SOC, SOC, Op_RegF, 99, F3->as_VMReg()->next() );
  172   reg_def F4   ( SOC, SOC, Op_RegF,  4, F4->as_VMReg()         );  // v   farg4
  173   reg_def F4_H ( SOC, SOC, Op_RegF, 99, F4->as_VMReg()->next() );
  174   reg_def F5   ( SOC, SOC, Op_RegF,  5, F5->as_VMReg()         );  // v   farg5
  175   reg_def F5_H ( SOC, SOC, Op_RegF, 99, F5->as_VMReg()->next() );
  176   reg_def F6   ( SOC, SOC, Op_RegF,  6, F6->as_VMReg()         );  // v   farg6
  177   reg_def F6_H ( SOC, SOC, Op_RegF, 99, F6->as_VMReg()->next() );
  178   reg_def F7   ( SOC, SOC, Op_RegF,  7, F7->as_VMReg()         );  // v   farg7
  179   reg_def F7_H ( SOC, SOC, Op_RegF, 99, F7->as_VMReg()->next() );
  180   reg_def F8   ( SOC, SOC, Op_RegF,  8, F8->as_VMReg()         );  // v   farg8
  181   reg_def F8_H ( SOC, SOC, Op_RegF, 99, F8->as_VMReg()->next() );
  182   reg_def F9   ( SOC, SOC, Op_RegF,  9, F9->as_VMReg()         );  // v   farg9
  183   reg_def F9_H ( SOC, SOC, Op_RegF, 99, F9->as_VMReg()->next() );
  184   reg_def F10  ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()        );  // v   farg10
  185   reg_def F10_H( SOC, SOC, Op_RegF, 99, F10->as_VMReg()->next());
  186   reg_def F11  ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()        );  // v   farg11
  187   reg_def F11_H( SOC, SOC, Op_RegF, 99, F11->as_VMReg()->next());
  188   reg_def F12  ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()        );  // v   farg12
  189   reg_def F12_H( SOC, SOC, Op_RegF, 99, F12->as_VMReg()->next());
  190   reg_def F13  ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()        );  // v   farg13
  191   reg_def F13_H( SOC, SOC, Op_RegF, 99, F13->as_VMReg()->next());
  192   reg_def F14  ( SOC, SOE, Op_RegF, 14, F14->as_VMReg()        );  // nv
  193   reg_def F14_H( SOC, SOE, Op_RegF, 99, F14->as_VMReg()->next());
  194   reg_def F15  ( SOC, SOE, Op_RegF, 15, F15->as_VMReg()        );  // nv
  195   reg_def F15_H( SOC, SOE, Op_RegF, 99, F15->as_VMReg()->next());
  196   reg_def F16  ( SOC, SOE, Op_RegF, 16, F16->as_VMReg()        );  // nv
  197   reg_def F16_H( SOC, SOE, Op_RegF, 99, F16->as_VMReg()->next());
  198   reg_def F17  ( SOC, SOE, Op_RegF, 17, F17->as_VMReg()        );  // nv
  199   reg_def F17_H( SOC, SOE, Op_RegF, 99, F17->as_VMReg()->next());
  200   reg_def F18  ( SOC, SOE, Op_RegF, 18, F18->as_VMReg()        );  // nv
  201   reg_def F18_H( SOC, SOE, Op_RegF, 99, F18->as_VMReg()->next());
  202   reg_def F19  ( SOC, SOE, Op_RegF, 19, F19->as_VMReg()        );  // nv
  203   reg_def F19_H( SOC, SOE, Op_RegF, 99, F19->as_VMReg()->next());
  204   reg_def F20  ( SOC, SOE, Op_RegF, 20, F20->as_VMReg()        );  // nv
  205   reg_def F20_H( SOC, SOE, Op_RegF, 99, F20->as_VMReg()->next());
  206   reg_def F21  ( SOC, SOE, Op_RegF, 21, F21->as_VMReg()        );  // nv
  207   reg_def F21_H( SOC, SOE, Op_RegF, 99, F21->as_VMReg()->next());
  208   reg_def F22  ( SOC, SOE, Op_RegF, 22, F22->as_VMReg()        );  // nv
  209   reg_def F22_H( SOC, SOE, Op_RegF, 99, F22->as_VMReg()->next());
  210   reg_def F23  ( SOC, SOE, Op_RegF, 23, F23->as_VMReg()        );  // nv
  211   reg_def F23_H( SOC, SOE, Op_RegF, 99, F23->as_VMReg()->next());
  212   reg_def F24  ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()        );  // nv
  213   reg_def F24_H( SOC, SOE, Op_RegF, 99, F24->as_VMReg()->next());
  214   reg_def F25  ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()        );  // nv
  215   reg_def F25_H( SOC, SOE, Op_RegF, 99, F25->as_VMReg()->next());
  216   reg_def F26  ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()        );  // nv
  217   reg_def F26_H( SOC, SOE, Op_RegF, 99, F26->as_VMReg()->next());
  218   reg_def F27  ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()        );  // nv
  219   reg_def F27_H( SOC, SOE, Op_RegF, 99, F27->as_VMReg()->next());
  220   reg_def F28  ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()        );  // nv
  221   reg_def F28_H( SOC, SOE, Op_RegF, 99, F28->as_VMReg()->next());
  222   reg_def F29  ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()        );  // nv
  223   reg_def F29_H( SOC, SOE, Op_RegF, 99, F29->as_VMReg()->next());
  224   reg_def F30  ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()        );  // nv
  225   reg_def F30_H( SOC, SOE, Op_RegF, 99, F30->as_VMReg()->next());
  226   reg_def F31  ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()        );  // nv
  227   reg_def F31_H( SOC, SOE, Op_RegF, 99, F31->as_VMReg()->next());
  228 
  229 // ----------------------------
  230 // Special Registers
  231 // ----------------------------
  232 
  233 // Condition Codes Flag Registers
  234 
  235   // PPC64 has 8 condition code "registers" which are all contained
  236   // in the CR register.
  237 
  238   // types: v = volatile, nv = non-volatile, s = system
  239   reg_def CCR0(SOC, SOC, Op_RegFlags, 0, CCR0->as_VMReg());  // v
  240   reg_def CCR1(SOC, SOC, Op_RegFlags, 1, CCR1->as_VMReg());  // v
  241   reg_def CCR2(SOC, SOC, Op_RegFlags, 2, CCR2->as_VMReg());  // nv
  242   reg_def CCR3(SOC, SOC, Op_RegFlags, 3, CCR3->as_VMReg());  // nv
  243   reg_def CCR4(SOC, SOC, Op_RegFlags, 4, CCR4->as_VMReg());  // nv
  244   reg_def CCR5(SOC, SOC, Op_RegFlags, 5, CCR5->as_VMReg());  // v
  245   reg_def CCR6(SOC, SOC, Op_RegFlags, 6, CCR6->as_VMReg());  // v
  246   reg_def CCR7(SOC, SOC, Op_RegFlags, 7, CCR7->as_VMReg());  // v
  247 
  248   // Special registers of PPC64
  249 
  250   reg_def SR_XER(    SOC, SOC, Op_RegP, 0, SR_XER->as_VMReg());     // v
  251   reg_def SR_LR(     SOC, SOC, Op_RegP, 1, SR_LR->as_VMReg());      // v
  252   reg_def SR_CTR(    SOC, SOC, Op_RegP, 2, SR_CTR->as_VMReg());     // v
  253   reg_def SR_VRSAVE( SOC, SOC, Op_RegP, 3, SR_VRSAVE->as_VMReg());  // v
  254   reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
  255   reg_def SR_PPR(    SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg());     // v
  256 
  257 // ----------------------------
  258 // Vector-Scalar Registers
  259 // ----------------------------
  260   // 1st 32 VSRs are aliases for the FPRs which are already defined above.
  261   reg_def VSR0 ( SOC, SOC, Op_VecX, 0, VMRegImpl::Bad());
  262   reg_def VSR1 ( SOC, SOC, Op_VecX, 1, VMRegImpl::Bad());
  263   reg_def VSR2 ( SOC, SOC, Op_VecX, 2, VMRegImpl::Bad());
  264   reg_def VSR3 ( SOC, SOC, Op_VecX, 3, VMRegImpl::Bad());
  265   reg_def VSR4 ( SOC, SOC, Op_VecX, 4, VMRegImpl::Bad());
  266   reg_def VSR5 ( SOC, SOC, Op_VecX, 5, VMRegImpl::Bad());
  267   reg_def VSR6 ( SOC, SOC, Op_VecX, 6, VMRegImpl::Bad());
  268   reg_def VSR7 ( SOC, SOC, Op_VecX, 7, VMRegImpl::Bad());
  269   reg_def VSR8 ( SOC, SOC, Op_VecX, 8, VMRegImpl::Bad());
  270   reg_def VSR9 ( SOC, SOC, Op_VecX, 9, VMRegImpl::Bad());
  271   reg_def VSR10 ( SOC, SOC, Op_VecX, 10, VMRegImpl::Bad());
  272   reg_def VSR11 ( SOC, SOC, Op_VecX, 11, VMRegImpl::Bad());
  273   reg_def VSR12 ( SOC, SOC, Op_VecX, 12, VMRegImpl::Bad());
  274   reg_def VSR13 ( SOC, SOC, Op_VecX, 13, VMRegImpl::Bad());
  275   reg_def VSR14 ( SOC, SOE, Op_VecX, 14, VMRegImpl::Bad());
  276   reg_def VSR15 ( SOC, SOE, Op_VecX, 15, VMRegImpl::Bad());
  277   reg_def VSR16 ( SOC, SOE, Op_VecX, 16, VMRegImpl::Bad());
  278   reg_def VSR17 ( SOC, SOE, Op_VecX, 17, VMRegImpl::Bad());
  279   reg_def VSR18 ( SOC, SOE, Op_VecX, 18, VMRegImpl::Bad());
  280   reg_def VSR19 ( SOC, SOE, Op_VecX, 19, VMRegImpl::Bad());
  281   reg_def VSR20 ( SOC, SOE, Op_VecX, 20, VMRegImpl::Bad());
  282   reg_def VSR21 ( SOC, SOE, Op_VecX, 21, VMRegImpl::Bad());
  283   reg_def VSR22 ( SOC, SOE, Op_VecX, 22, VMRegImpl::Bad());
  284   reg_def VSR23 ( SOC, SOE, Op_VecX, 23, VMRegImpl::Bad());
  285   reg_def VSR24 ( SOC, SOE, Op_VecX, 24, VMRegImpl::Bad());
  286   reg_def VSR25 ( SOC, SOE, Op_VecX, 25, VMRegImpl::Bad());
  287   reg_def VSR26 ( SOC, SOE, Op_VecX, 26, VMRegImpl::Bad());
  288   reg_def VSR27 ( SOC, SOE, Op_VecX, 27, VMRegImpl::Bad());
  289   reg_def VSR28 ( SOC, SOE, Op_VecX, 28, VMRegImpl::Bad());
  290   reg_def VSR29 ( SOC, SOE, Op_VecX, 29, VMRegImpl::Bad());
  291   reg_def VSR30 ( SOC, SOE, Op_VecX, 30, VMRegImpl::Bad());
  292   reg_def VSR31 ( SOC, SOE, Op_VecX, 31, VMRegImpl::Bad());
  293   // 2nd 32 VSRs are aliases for the VRs which are only defined here.
  294   reg_def VSR32 ( SOC, SOC, Op_VecX, 32, VSR32->as_VMReg());
  295   reg_def VSR33 ( SOC, SOC, Op_VecX, 33, VSR33->as_VMReg());
  296   reg_def VSR34 ( SOC, SOC, Op_VecX, 34, VSR34->as_VMReg());
  297   reg_def VSR35 ( SOC, SOC, Op_VecX, 35, VSR35->as_VMReg());
  298   reg_def VSR36 ( SOC, SOC, Op_VecX, 36, VSR36->as_VMReg());
  299   reg_def VSR37 ( SOC, SOC, Op_VecX, 37, VSR37->as_VMReg());
  300   reg_def VSR38 ( SOC, SOC, Op_VecX, 38, VSR38->as_VMReg());
  301   reg_def VSR39 ( SOC, SOC, Op_VecX, 39, VSR39->as_VMReg());
  302   reg_def VSR40 ( SOC, SOC, Op_VecX, 40, VSR40->as_VMReg());
  303   reg_def VSR41 ( SOC, SOC, Op_VecX, 41, VSR41->as_VMReg());
  304   reg_def VSR42 ( SOC, SOC, Op_VecX, 42, VSR42->as_VMReg());
  305   reg_def VSR43 ( SOC, SOC, Op_VecX, 43, VSR43->as_VMReg());
  306   reg_def VSR44 ( SOC, SOC, Op_VecX, 44, VSR44->as_VMReg());
  307   reg_def VSR45 ( SOC, SOC, Op_VecX, 45, VSR45->as_VMReg());
  308   reg_def VSR46 ( SOC, SOC, Op_VecX, 46, VSR46->as_VMReg());
  309   reg_def VSR47 ( SOC, SOC, Op_VecX, 47, VSR47->as_VMReg());
  310   reg_def VSR48 ( SOC, SOC, Op_VecX, 48, VSR48->as_VMReg());
  311   reg_def VSR49 ( SOC, SOC, Op_VecX, 49, VSR49->as_VMReg());
  312   reg_def VSR50 ( SOC, SOC, Op_VecX, 50, VSR50->as_VMReg());
  313   reg_def VSR51 ( SOC, SOC, Op_VecX, 51, VSR51->as_VMReg());
  314   reg_def VSR52 ( SOC, SOE, Op_VecX, 52, VSR52->as_VMReg());
  315   reg_def VSR53 ( SOC, SOE, Op_VecX, 53, VSR53->as_VMReg());
  316   reg_def VSR54 ( SOC, SOE, Op_VecX, 54, VSR54->as_VMReg());
  317   reg_def VSR55 ( SOC, SOE, Op_VecX, 55, VSR55->as_VMReg());
  318   reg_def VSR56 ( SOC, SOE, Op_VecX, 56, VSR56->as_VMReg());
  319   reg_def VSR57 ( SOC, SOE, Op_VecX, 57, VSR57->as_VMReg());
  320   reg_def VSR58 ( SOC, SOE, Op_VecX, 58, VSR58->as_VMReg());
  321   reg_def VSR59 ( SOC, SOE, Op_VecX, 59, VSR59->as_VMReg());
  322   reg_def VSR60 ( SOC, SOE, Op_VecX, 60, VSR60->as_VMReg());
  323   reg_def VSR61 ( SOC, SOE, Op_VecX, 61, VSR61->as_VMReg());
  324   reg_def VSR62 ( SOC, SOE, Op_VecX, 62, VSR62->as_VMReg());
  325   reg_def VSR63 ( SOC, SOE, Op_VecX, 63, VSR63->as_VMReg());
  326 
  327 // ----------------------------
  328 // Specify priority of register selection within phases of register
  329 // allocation. Highest priority is first. A useful heuristic is to
  330 // give registers a low priority when they are required by machine
  331 // instructions, like EAX and EDX on I486, and choose no-save registers
  332 // before save-on-call, & save-on-call before save-on-entry. Registers
  333 // which participate in fixed calling sequences should come last.
  334 // Registers which are used as pairs must fall on an even boundary.
  335 
  336 // It's worth about 1% on SPEC geomean to get this right.
  337 
  338 // Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration
  339 // in adGlobals_ppc.hpp which defines the <register>_num values, e.g.
  340 // R3_num. Therefore, R3_num may not be (and in reality is not)
  341 // the same as R3->encoding()! Furthermore, we cannot make any
  342 // assumptions on ordering, e.g. R3_num may be less than R2_num.
  343 // Additionally, the function
  344 //   static enum RC rc_class(OptoReg::Name reg )
  345 // maps a given <register>_num value to its chunk type (except for flags)
  346 // and its current implementation relies on chunk0 and chunk1 having a
  347 // size of 64 each.
  348 
  349 // If you change this allocation class, please have a look at the
  350 // default values for the parameters RoundRobinIntegerRegIntervalStart
  351 // and RoundRobinFloatRegIntervalStart
  352 
  353 alloc_class chunk0 (
  354   // Chunk0 contains *all* 64 integer registers halves.
  355 
  356   // "non-volatile" registers
  357   R14, R14_H,
  358   R15, R15_H,
  359   R17, R17_H,
  360   R18, R18_H,
  361   R19, R19_H,
  362   R20, R20_H,
  363   R21, R21_H,
  364   R22, R22_H,
  365   R23, R23_H,
  366   R24, R24_H,
  367   R25, R25_H,
  368   R26, R26_H,
  369   R27, R27_H,
  370   R28, R28_H,
  371   R29, R29_H,
  372   R30, R30_H,
  373   R31, R31_H,
  374 
  375   // scratch/special registers
  376   R11, R11_H,
  377   R12, R12_H,
  378 
  379   // argument registers
  380   R10, R10_H,
  381   R9,  R9_H,
  382   R8,  R8_H,
  383   R7,  R7_H,
  384   R6,  R6_H,
  385   R5,  R5_H,
  386   R4,  R4_H,
  387   R3,  R3_H,
  388 
  389   // special registers, not available for allocation
  390   R16, R16_H,     // R16_thread
  391   R13, R13_H,     // system thread id
  392   R2,  R2_H,      // may be used for TOC
  393   R1,  R1_H,      // SP
  394   R0,  R0_H       // R0 (scratch)
  395 );
  396 
  397 // If you change this allocation class, please have a look at the
  398 // default values for the parameters RoundRobinIntegerRegIntervalStart
  399 // and RoundRobinFloatRegIntervalStart
  400 
  401 alloc_class chunk1 (
  402   // Chunk1 contains *all* 64 floating-point registers halves.
  403 
  404   // scratch register
  405   F0,  F0_H,
  406 
  407   // argument registers
  408   F13, F13_H,
  409   F12, F12_H,
  410   F11, F11_H,
  411   F10, F10_H,
  412   F9,  F9_H,
  413   F8,  F8_H,
  414   F7,  F7_H,
  415   F6,  F6_H,
  416   F5,  F5_H,
  417   F4,  F4_H,
  418   F3,  F3_H,
  419   F2,  F2_H,
  420   F1,  F1_H,
  421 
  422   // non-volatile registers
  423   F14, F14_H,
  424   F15, F15_H,
  425   F16, F16_H,
  426   F17, F17_H,
  427   F18, F18_H,
  428   F19, F19_H,
  429   F20, F20_H,
  430   F21, F21_H,
  431   F22, F22_H,
  432   F23, F23_H,
  433   F24, F24_H,
  434   F25, F25_H,
  435   F26, F26_H,
  436   F27, F27_H,
  437   F28, F28_H,
  438   F29, F29_H,
  439   F30, F30_H,
  440   F31, F31_H
  441 );
  442 
  443 alloc_class chunk2 (
  444   // Chunk2 contains *all* 8 condition code registers.
  445 
  446   CCR0,
  447   CCR1,
  448   CCR2,
  449   CCR3,
  450   CCR4,
  451   CCR5,
  452   CCR6,
  453   CCR7
  454 );
  455 
  456 alloc_class chunk3 (
  457   VSR0,
  458   VSR1,
  459   VSR2,
  460   VSR3,
  461   VSR4,
  462   VSR5,
  463   VSR6,
  464   VSR7,
  465   VSR8,
  466   VSR9,
  467   VSR10,
  468   VSR11,
  469   VSR12,
  470   VSR13,
  471   VSR14,
  472   VSR15,
  473   VSR16,
  474   VSR17,
  475   VSR18,
  476   VSR19,
  477   VSR20,
  478   VSR21,
  479   VSR22,
  480   VSR23,
  481   VSR24,
  482   VSR25,
  483   VSR26,
  484   VSR27,
  485   VSR28,
  486   VSR29,
  487   VSR30,
  488   VSR31,
  489   VSR32,
  490   VSR33,
  491   VSR34,
  492   VSR35,
  493   VSR36,
  494   VSR37,
  495   VSR38,
  496   VSR39,
  497   VSR40,
  498   VSR41,
  499   VSR42,
  500   VSR43,
  501   VSR44,
  502   VSR45,
  503   VSR46,
  504   VSR47,
  505   VSR48,
  506   VSR49,
  507   VSR50,
  508   VSR51,
  509   VSR52,
  510   VSR53,
  511   VSR54,
  512   VSR55,
  513   VSR56,
  514   VSR57,
  515   VSR58,
  516   VSR59,
  517   VSR60,
  518   VSR61,
  519   VSR62,
  520   VSR63
  521 );
  522 
  523 alloc_class chunk4 (
  524   // special registers
  525   // These registers are not allocated, but used for nodes generated by postalloc expand.
  526   SR_XER,
  527   SR_LR,
  528   SR_CTR,
  529   SR_VRSAVE,
  530   SR_SPEFSCR,
  531   SR_PPR
  532 );
  533 
  534 //-------Architecture Description Register Classes-----------------------
  535 
  536 // Several register classes are automatically defined based upon
  537 // information in this architecture description.
  538 
  539 // 1) reg_class inline_cache_reg           ( as defined in frame section )
  540 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  541 //
  542 
  543 // ----------------------------
  544 // 32 Bit Register Classes
  545 // ----------------------------
  546 
  547 // We specify registers twice, once as read/write, and once read-only.
  548 // We use the read-only registers for source operands. With this, we
  549 // can include preset read only registers in this class, as a hard-coded
  550 // '0'-register. (We used to simulate this on ppc.)
  551 
  552 // 32 bit registers that can be read and written i.e. these registers
  553 // can be dest (or src) of normal instructions.
  554 reg_class bits32_reg_rw(
  555 /*R0*/              // R0
  556 /*R1*/              // SP
  557   R2,               // TOC
  558   R3,
  559   R4,
  560   R5,
  561   R6,
  562   R7,
  563   R8,
  564   R9,
  565   R10,
  566   R11,
  567   R12,
  568 /*R13*/             // system thread id
  569   R14,
  570   R15,
  571 /*R16*/             // R16_thread
  572   R17,
  573   R18,
  574   R19,
  575   R20,
  576   R21,
  577   R22,
  578   R23,
  579   R24,
  580   R25,
  581   R26,
  582   R27,
  583   R28,
  584 /*R29,*/             // global TOC
  585   R30,
  586   R31
  587 );
  588 
  589 // 32 bit registers that can only be read i.e. these registers can
  590 // only be src of all instructions.
  591 reg_class bits32_reg_ro(
  592 /*R0*/              // R0
  593 /*R1*/              // SP
  594   R2                // TOC
  595   R3,
  596   R4,
  597   R5,
  598   R6,
  599   R7,
  600   R8,
  601   R9,
  602   R10,
  603   R11,
  604   R12,
  605 /*R13*/             // system thread id
  606   R14,
  607   R15,
  608 /*R16*/             // R16_thread
  609   R17,
  610   R18,
  611   R19,
  612   R20,
  613   R21,
  614   R22,
  615   R23,
  616   R24,
  617   R25,
  618   R26,
  619   R27,
  620   R28,
  621 /*R29,*/
  622   R30,
  623   R31
  624 );
  625 
  626 reg_class rscratch1_bits32_reg(R11);
  627 reg_class rscratch2_bits32_reg(R12);
  628 reg_class rarg1_bits32_reg(R3);
  629 reg_class rarg2_bits32_reg(R4);
  630 reg_class rarg3_bits32_reg(R5);
  631 reg_class rarg4_bits32_reg(R6);
  632 
  633 // ----------------------------
  634 // 64 Bit Register Classes
  635 // ----------------------------
  636 // 64-bit build means 64-bit pointers means hi/lo pairs
  637 
  638 reg_class rscratch1_bits64_reg(R11_H, R11);
  639 reg_class rscratch2_bits64_reg(R12_H, R12);
  640 reg_class rarg1_bits64_reg(R3_H, R3);
  641 reg_class rarg2_bits64_reg(R4_H, R4);
  642 reg_class rarg3_bits64_reg(R5_H, R5);
  643 reg_class rarg4_bits64_reg(R6_H, R6);
  644 // Thread register, 'written' by tlsLoadP, see there.
  645 reg_class thread_bits64_reg(R16_H, R16);
  646 
  647 reg_class r19_bits64_reg(R19_H, R19);
  648 
  649 // 64 bit registers that can be read and written i.e. these registers
  650 // can be dest (or src) of normal instructions.
  651 reg_class bits64_reg_rw(
  652 /*R0_H,  R0*/     // R0
  653 /*R1_H,  R1*/     // SP
  654   R2_H,  R2,      // TOC
  655   R3_H,  R3,
  656   R4_H,  R4,
  657   R5_H,  R5,
  658   R6_H,  R6,
  659   R7_H,  R7,
  660   R8_H,  R8,
  661   R9_H,  R9,
  662   R10_H, R10,
  663   R11_H, R11,
  664   R12_H, R12,
  665 /*R13_H, R13*/   // system thread id
  666   R14_H, R14,
  667   R15_H, R15,
  668 /*R16_H, R16*/   // R16_thread
  669   R17_H, R17,
  670   R18_H, R18,
  671   R19_H, R19,
  672   R20_H, R20,
  673   R21_H, R21,
  674   R22_H, R22,
  675   R23_H, R23,
  676   R24_H, R24,
  677   R25_H, R25,
  678   R26_H, R26,
  679   R27_H, R27,
  680   R28_H, R28,
  681 /*R29_H, R29,*/
  682   R30_H, R30,
  683   R31_H, R31
  684 );
  685 
  686 // 64 bit registers used excluding r2, r11 and r12
  687 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses
  688 // r2, r11 and r12 internally.
  689 reg_class bits64_reg_leaf_call(
  690 /*R0_H,  R0*/     // R0
  691 /*R1_H,  R1*/     // SP
  692 /*R2_H,  R2*/     // TOC
  693   R3_H,  R3,
  694   R4_H,  R4,
  695   R5_H,  R5,
  696   R6_H,  R6,
  697   R7_H,  R7,
  698   R8_H,  R8,
  699   R9_H,  R9,
  700   R10_H, R10,
  701 /*R11_H, R11*/
  702 /*R12_H, R12*/
  703 /*R13_H, R13*/   // system thread id
  704   R14_H, R14,
  705   R15_H, R15,
  706 /*R16_H, R16*/   // R16_thread
  707   R17_H, R17,
  708   R18_H, R18,
  709   R19_H, R19,
  710   R20_H, R20,
  711   R21_H, R21,
  712   R22_H, R22,
  713   R23_H, R23,
  714   R24_H, R24,
  715   R25_H, R25,
  716   R26_H, R26,
  717   R27_H, R27,
  718   R28_H, R28,
  719 /*R29_H, R29,*/
  720   R30_H, R30,
  721   R31_H, R31
  722 );
  723 
  724 // Used to hold the TOC to avoid collisions with expanded DynamicCall
  725 // which uses r19 as inline cache internally and expanded LeafCall which uses
  726 // r2, r11 and r12 internally.
  727 reg_class bits64_constant_table_base(
  728 /*R0_H,  R0*/     // R0
  729 /*R1_H,  R1*/     // SP
  730 /*R2_H,  R2*/     // TOC
  731   R3_H,  R3,
  732   R4_H,  R4,
  733   R5_H,  R5,
  734   R6_H,  R6,
  735   R7_H,  R7,
  736   R8_H,  R8,
  737   R9_H,  R9,
  738   R10_H, R10,
  739 /*R11_H, R11*/
  740 /*R12_H, R12*/
  741 /*R13_H, R13*/   // system thread id
  742   R14_H, R14,
  743   R15_H, R15,
  744 /*R16_H, R16*/   // R16_thread
  745   R17_H, R17,
  746   R18_H, R18,
  747 /*R19_H, R19*/
  748   R20_H, R20,
  749   R21_H, R21,
  750   R22_H, R22,
  751   R23_H, R23,
  752   R24_H, R24,
  753   R25_H, R25,
  754   R26_H, R26,
  755   R27_H, R27,
  756   R28_H, R28,
  757 /*R29_H, R29,*/
  758   R30_H, R30,
  759   R31_H, R31
  760 );
  761 
  762 // 64 bit registers that can only be read i.e. these registers can
  763 // only be src of all instructions.
  764 reg_class bits64_reg_ro(
  765 /*R0_H,  R0*/     // R0
  766   R1_H,  R1,
  767   R2_H,  R2,       // TOC
  768   R3_H,  R3,
  769   R4_H,  R4,
  770   R5_H,  R5,
  771   R6_H,  R6,
  772   R7_H,  R7,
  773   R8_H,  R8,
  774   R9_H,  R9,
  775   R10_H, R10,
  776   R11_H, R11,
  777   R12_H, R12,
  778 /*R13_H, R13*/   // system thread id
  779   R14_H, R14,
  780   R15_H, R15,
  781   R16_H, R16,    // R16_thread
  782   R17_H, R17,
  783   R18_H, R18,
  784   R19_H, R19,
  785   R20_H, R20,
  786   R21_H, R21,
  787   R22_H, R22,
  788   R23_H, R23,
  789   R24_H, R24,
  790   R25_H, R25,
  791   R26_H, R26,
  792   R27_H, R27,
  793   R28_H, R28,
  794 /*R29_H, R29,*/ // TODO: let allocator handle TOC!!
  795   R30_H, R30,
  796   R31_H, R31
  797 );
  798 
  799 
  800 // ----------------------------
  801 // Special Class for Condition Code Flags Register
  802 
  803 reg_class int_flags(
  804 /*CCR0*/             // scratch
  805 /*CCR1*/             // scratch
  806 /*CCR2*/             // nv!
  807 /*CCR3*/             // nv!
  808 /*CCR4*/             // nv!
  809   CCR5,
  810   CCR6,
  811   CCR7
  812 );
  813 
  814 reg_class int_flags_ro(
  815   CCR0,
  816   CCR1,
  817   CCR2,
  818   CCR3,
  819   CCR4,
  820   CCR5,
  821   CCR6,
  822   CCR7
  823 );
  824 
  825 reg_class int_flags_CR0(CCR0);
  826 reg_class int_flags_CR1(CCR1);
  827 reg_class int_flags_CR6(CCR6);
  828 reg_class ctr_reg(SR_CTR);
  829 
  830 // ----------------------------
  831 // Float Register Classes
  832 // ----------------------------
  833 
  834 reg_class flt_reg(
  835   F0,
  836   F1,
  837   F2,
  838   F3,
  839   F4,
  840   F5,
  841   F6,
  842   F7,
  843   F8,
  844   F9,
  845   F10,
  846   F11,
  847   F12,
  848   F13,
  849   F14,              // nv!
  850   F15,              // nv!
  851   F16,              // nv!
  852   F17,              // nv!
  853   F18,              // nv!
  854   F19,              // nv!
  855   F20,              // nv!
  856   F21,              // nv!
  857   F22,              // nv!
  858   F23,              // nv!
  859   F24,              // nv!
  860   F25,              // nv!
  861   F26,              // nv!
  862   F27,              // nv!
  863   F28,              // nv!
  864   F29,              // nv!
  865   F30,              // nv!
  866   F31               // nv!
  867 );
  868 
  869 // Double precision float registers have virtual `high halves' that
  870 // are needed by the allocator.
  871 reg_class dbl_reg(
  872   F0,  F0_H,
  873   F1,  F1_H,
  874   F2,  F2_H,
  875   F3,  F3_H,
  876   F4,  F4_H,
  877   F5,  F5_H,
  878   F6,  F6_H,
  879   F7,  F7_H,
  880   F8,  F8_H,
  881   F9,  F9_H,
  882   F10, F10_H,
  883   F11, F11_H,
  884   F12, F12_H,
  885   F13, F13_H,
  886   F14, F14_H,    // nv!
  887   F15, F15_H,    // nv!
  888   F16, F16_H,    // nv!
  889   F17, F17_H,    // nv!
  890   F18, F18_H,    // nv!
  891   F19, F19_H,    // nv!
  892   F20, F20_H,    // nv!
  893   F21, F21_H,    // nv!
  894   F22, F22_H,    // nv!
  895   F23, F23_H,    // nv!
  896   F24, F24_H,    // nv!
  897   F25, F25_H,    // nv!
  898   F26, F26_H,    // nv!
  899   F27, F27_H,    // nv!
  900   F28, F28_H,    // nv!
  901   F29, F29_H,    // nv!
  902   F30, F30_H,    // nv!
  903   F31, F31_H     // nv!
  904 );
  905 
  906 // ----------------------------
  907 // Vector-Scalar Register Class
  908 // ----------------------------
  909 
  910 reg_class vs_reg(
  911   // Attention: Only these ones are saved & restored at safepoint by RegisterSaver.
  912   VSR32,
  913   VSR33,
  914   VSR34,
  915   VSR35,
  916   VSR36,
  917   VSR37,
  918   VSR38,
  919   VSR39,
  920   VSR40,
  921   VSR41,
  922   VSR42,
  923   VSR43,
  924   VSR44,
  925   VSR45,
  926   VSR46,
  927   VSR47,
  928   VSR48,
  929   VSR49,
  930   VSR50,
  931   VSR51
  932   // VSR52-VSR63 // nv!
  933 );
  934 
  935  %}
  936 
  937 //----------DEFINITION BLOCK---------------------------------------------------
  938 // Define name --> value mappings to inform the ADLC of an integer valued name
  939 // Current support includes integer values in the range [0, 0x7FFFFFFF]
  940 // Format:
  941 //        int_def  <name>         ( <int_value>, <expression>);
  942 // Generated Code in ad_<arch>.hpp
  943 //        #define  <name>   (<expression>)
  944 //        // value == <int_value>
  945 // Generated code in ad_<arch>.cpp adlc_verification()
  946 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
  947 //
  948 definitions %{
  949   // The default cost (of an ALU instruction).
  950   int_def DEFAULT_COST_LOW        (     30,      30);
  951   int_def DEFAULT_COST            (    100,     100);
  952   int_def HUGE_COST               (1000000, 1000000);
  953 
  954   // Memory refs
  955   int_def MEMORY_REF_COST_LOW     (    200, DEFAULT_COST * 2);
  956   int_def MEMORY_REF_COST         (    300, DEFAULT_COST * 3);
  957 
  958   // Branches are even more expensive.
  959   int_def BRANCH_COST             (    900, DEFAULT_COST * 9);
  960   int_def CALL_COST               (   1300, DEFAULT_COST * 13);
  961 %}
  962 
  963 
  964 //----------SOURCE BLOCK-------------------------------------------------------
  965 // This is a block of C++ code which provides values, functions, and
  966 // definitions necessary in the rest of the architecture description.
  967 source_hpp %{
  968   // Header information of the source block.
  969   // Method declarations/definitions which are used outside
  970   // the ad-scope can conveniently be defined here.
  971   //
  972   // To keep related declarations/definitions/uses close together,
  973   // we switch between source %{ }% and source_hpp %{ }% freely as needed.
  974 
  975 #include "opto/convertnode.hpp"
  976 
  977   // Returns true if Node n is followed by a MemBar node that
  978   // will do an acquire. If so, this node must not do the acquire
  979   // operation.
  980   bool followed_by_acquire(const Node *n);
  981 %}
  982 
  983 source %{
  984 
  985 #include "oops/klass.inline.hpp"
  986 
  987 void PhaseOutput::pd_perform_mach_node_analysis() {
  988 }
  989 
  990 int MachNode::pd_alignment_required() const {
  991   return 1;
  992 }
  993 
  994 int MachNode::compute_padding(int current_offset) const {
  995   return 0;
  996 }
  997 
  998 // Should the matcher clone input 'm' of node 'n'?
  999 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 1000   return false;
 1001 }
 1002 
 1003 // Should the Matcher clone shifts on addressing modes, expecting them
 1004 // to be subsumed into complex addressing expressions or compute them
 1005 // into registers?
 1006 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 1007   return clone_base_plus_offset_address(m, mstack, address_visited);
 1008 }
 1009 
 1010 // Optimize load-acquire.
 1011 //
 1012 // Check if acquire is unnecessary due to following operation that does
 1013 // acquire anyways.
 1014 // Walk the pattern:
 1015 //
 1016 //      n: Load.acq
 1017 //           |
 1018 //      MemBarAcquire
 1019 //       |         |
 1020 //  Proj(ctrl)  Proj(mem)
 1021 //       |         |
 1022 //   MemBarRelease/Volatile
 1023 //
 1024 bool followed_by_acquire(const Node *load) {
 1025   assert(load->is_Load(), "So far implemented only for loads.");
 1026 
 1027   // Find MemBarAcquire.
 1028   const Node *mba = NULL;
 1029   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
 1030     const Node *out = load->fast_out(i);
 1031     if (out->Opcode() == Op_MemBarAcquire) {
 1032       if (out->in(0) == load) continue; // Skip control edge, membar should be found via precedence edge.
 1033       mba = out;
 1034       break;
 1035     }
 1036   }
 1037   if (!mba) return false;
 1038 
 1039   // Find following MemBar node.
 1040   //
 1041   // The following node must be reachable by control AND memory
 1042   // edge to assure no other operations are in between the two nodes.
 1043   //
 1044   // So first get the Proj node, mem_proj, to use it to iterate forward.
 1045   Node *mem_proj = NULL;
 1046   for (DUIterator_Fast imax, i = mba->fast_outs(imax); i < imax; i++) {
 1047     mem_proj = mba->fast_out(i);      // Runs out of bounds and asserts if Proj not found.
 1048     assert(mem_proj->is_Proj(), "only projections here");
 1049     ProjNode *proj = mem_proj->as_Proj();
 1050     if (proj->_con == TypeFunc::Memory &&
 1051         !Compile::current()->node_arena()->contains(mem_proj)) // Unmatched old-space only
 1052       break;
 1053   }
 1054   assert(mem_proj->as_Proj()->_con == TypeFunc::Memory, "Graph broken");
 1055 
 1056   // Search MemBar behind Proj. If there are other memory operations
 1057   // behind the Proj we lost.
 1058   for (DUIterator_Fast jmax, j = mem_proj->fast_outs(jmax); j < jmax; j++) {
 1059     Node *x = mem_proj->fast_out(j);
 1060     // Proj might have an edge to a store or load node which precedes the membar.
 1061     if (x->is_Mem()) return false;
 1062 
 1063     // On PPC64 release and volatile are implemented by an instruction
 1064     // that also has acquire semantics. I.e. there is no need for an
 1065     // acquire before these.
 1066     int xop = x->Opcode();
 1067     if (xop == Op_MemBarRelease || xop == Op_MemBarVolatile) {
 1068       // Make sure we're not missing Call/Phi/MergeMem by checking
 1069       // control edges. The control edge must directly lead back
 1070       // to the MemBarAcquire
 1071       Node *ctrl_proj = x->in(0);
 1072       if (ctrl_proj->is_Proj() && ctrl_proj->in(0) == mba) {
 1073         return true;
 1074       }
 1075     }
 1076   }
 1077 
 1078   return false;
 1079 }
 1080 
 1081 #define __ _masm.
 1082 
 1083 // Tertiary op of a LoadP or StoreP encoding.
 1084 #define REGP_OP true
 1085 
 1086 // ****************************************************************************
 1087 
 1088 // REQUIRED FUNCTIONALITY
 1089 
 1090 // !!!!! Special hack to get all type of calls to specify the byte offset
 1091 //       from the start of the call to the point where the return address
 1092 //       will point.
 1093 
 1094 // PPC port: Removed use of lazy constant construct.
 1095 
 1096 int MachCallStaticJavaNode::ret_addr_offset() {
 1097   // It's only a single branch-and-link instruction.
 1098   return 4;
 1099 }
 1100 
 1101 int MachCallDynamicJavaNode::ret_addr_offset() {
 1102   // Offset is 4 with postalloc expanded calls (bl is one instruction). We use
 1103   // postalloc expanded calls if we use inline caches and do not update method data.
 1104   if (UseInlineCaches) return 4;
 1105 
 1106   int vtable_index = this->_vtable_index;
 1107   if (vtable_index < 0) {
 1108     // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 1109     assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 1110     return 12;
 1111   } else {
 1112     return 24 + MacroAssembler::instr_size_for_decode_klass_not_null();
 1113   }
 1114 }
 1115 
 1116 int MachCallRuntimeNode::ret_addr_offset() {
 1117   if (rule() == CallRuntimeDirect_rule) {
 1118     // CallRuntimeDirectNode uses call_c.
 1119 #if defined(ABI_ELFv2)
 1120     return 28;
 1121 #else
 1122     return 40;
 1123 #endif
 1124   }
 1125   assert(rule() == CallLeafDirect_rule, "unexpected node with rule %u", rule());
 1126   // CallLeafDirectNode uses bl.
 1127   return 4;
 1128 }
 1129 
 1130 int MachCallNativeNode::ret_addr_offset() {
 1131   Unimplemented();
 1132   return -1;
 1133 }
 1134 
 1135 //=============================================================================
 1136 
 1137 // condition code conversions
 1138 
 1139 static int cc_to_boint(int cc) {
 1140   return Assembler::bcondCRbiIs0 | (cc & 8);
 1141 }
 1142 
 1143 static int cc_to_inverse_boint(int cc) {
 1144   return Assembler::bcondCRbiIs0 | (8-(cc & 8));
 1145 }
 1146 
 1147 static int cc_to_biint(int cc, int flags_reg) {
 1148   return (flags_reg << 2) | (cc & 3);
 1149 }
 1150 
 1151 //=============================================================================
 1152 
 1153 // Compute padding required for nodes which need alignment. The padding
 1154 // is the number of bytes (not instructions) which will be inserted before
 1155 // the instruction. The padding must match the size of a NOP instruction.
 1156 
 1157 // Add nop if a prefixed (two-word) instruction is going to cross a 64-byte boundary.
 1158 // (See Section 1.6 of Power ISA Version 3.1)
 1159 static int compute_prefix_padding(int current_offset) {
 1160   assert(PowerArchitecturePPC64 >= 10 && (CodeEntryAlignment & 63) == 0,
 1161          "Code buffer must be aligned to a multiple of 64 bytes");
 1162   if (is_aligned(current_offset + BytesPerInstWord, 64)) {
 1163     return BytesPerInstWord;
 1164   }
 1165   return 0;
 1166 }
 1167 
 1168 int loadConI32Node::compute_padding(int current_offset) const {
 1169   return compute_prefix_padding(current_offset);
 1170 }
 1171 
 1172 int loadConL34Node::compute_padding(int current_offset) const {
 1173   return compute_prefix_padding(current_offset);
 1174 }
 1175 
 1176 int addI_reg_imm32Node::compute_padding(int current_offset) const {
 1177   return compute_prefix_padding(current_offset);
 1178 }
 1179 
 1180 int addL_reg_imm34Node::compute_padding(int current_offset) const {
 1181   return compute_prefix_padding(current_offset);
 1182 }
 1183 
 1184 int addP_reg_imm34Node::compute_padding(int current_offset) const {
 1185   return compute_prefix_padding(current_offset);
 1186 }
 1187 
 1188 int cmprb_Whitespace_reg_reg_prefixedNode::compute_padding(int current_offset) const {
 1189   return compute_prefix_padding(current_offset);
 1190 }
 1191 
 1192 
 1193 //=============================================================================
 1194 
 1195 // Emit an interrupt that is caught by the debugger (for debugging compiler).
 1196 void emit_break(CodeBuffer &cbuf) {
 1197   C2_MacroAssembler _masm(&cbuf);
 1198   __ illtrap();
 1199 }
 1200 
 1201 #ifndef PRODUCT
 1202 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1203   st->print("BREAKPOINT");
 1204 }
 1205 #endif
 1206 
 1207 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1208   emit_break(cbuf);
 1209 }
 1210 
 1211 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
 1212   return MachNode::size(ra_);
 1213 }
 1214 
 1215 //=============================================================================
 1216 
 1217 void emit_nop(CodeBuffer &cbuf) {
 1218   C2_MacroAssembler _masm(&cbuf);
 1219   __ nop();
 1220 }
 1221 
 1222 static inline void emit_long(CodeBuffer &cbuf, int value) {
 1223   *((int*)(cbuf.insts_end())) = value;
 1224   cbuf.set_insts_end(cbuf.insts_end() + BytesPerInstWord);
 1225 }
 1226 
 1227 //=============================================================================
 1228 
 1229 %} // interrupt source
 1230 
 1231 source_hpp %{ // Header information of the source block.
 1232 
 1233 //--------------------------------------------------------------
 1234 //---<  Used for optimization in Compile::Shorten_branches  >---
 1235 //--------------------------------------------------------------
 1236 
 1237 class C2_MacroAssembler;
 1238 
 1239 class CallStubImpl {
 1240 
 1241  public:
 1242 
 1243   // Emit call stub, compiled java to interpreter.
 1244   static void emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset);
 1245 
 1246   // Size of call trampoline stub.
 1247   // This doesn't need to be accurate to the byte, but it
 1248   // must be larger than or equal to the real size of the stub.
 1249   static uint size_call_trampoline() {
 1250     return MacroAssembler::trampoline_stub_size;
 1251   }
 1252 
 1253   // number of relocations needed by a call trampoline stub
 1254   static uint reloc_call_trampoline() {
 1255     return 5;
 1256   }
 1257 
 1258 };
 1259 
 1260 %} // end source_hpp
 1261 
 1262 source %{
 1263 
 1264 // Emit a trampoline stub for a call to a target which is too far away.
 1265 //
 1266 // code sequences:
 1267 //
 1268 // call-site:
 1269 //   branch-and-link to <destination> or <trampoline stub>
 1270 //
 1271 // Related trampoline stub for this call-site in the stub section:
 1272 //   load the call target from the constant pool
 1273 //   branch via CTR (LR/link still points to the call-site above)
 1274 
 1275 void CallStubImpl::emit_trampoline_stub(C2_MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
 1276   address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
 1277   if (stub == NULL) {
 1278     ciEnv::current()->record_out_of_memory_failure();
 1279   }
 1280 }
 1281 
 1282 //=============================================================================
 1283 
 1284 // Emit an inline branch-and-link call and a related trampoline stub.
 1285 //
 1286 // code sequences:
 1287 //
 1288 // call-site:
 1289 //   branch-and-link to <destination> or <trampoline stub>
 1290 //
 1291 // Related trampoline stub for this call-site in the stub section:
 1292 //   load the call target from the constant pool
 1293 //   branch via CTR (LR/link still points to the call-site above)
 1294 //
 1295 
 1296 typedef struct {
 1297   int insts_call_instruction_offset;
 1298   int ret_addr_offset;
 1299 } EmitCallOffsets;
 1300 
 1301 // Emit a branch-and-link instruction that branches to a trampoline.
 1302 // - Remember the offset of the branch-and-link instruction.
 1303 // - Add a relocation at the branch-and-link instruction.
 1304 // - Emit a branch-and-link.
 1305 // - Remember the return pc offset.
 1306 EmitCallOffsets emit_call_with_trampoline_stub(C2_MacroAssembler &_masm, address entry_point, relocInfo::relocType rtype) {
 1307   EmitCallOffsets offsets = { -1, -1 };
 1308   const int start_offset = __ offset();
 1309   offsets.insts_call_instruction_offset = __ offset();
 1310 
 1311   // No entry point given, use the current pc.
 1312   if (entry_point == NULL) entry_point = __ pc();
 1313 
 1314   // Put the entry point as a constant into the constant pool.
 1315   const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
 1316   if (entry_point_toc_addr == NULL) {
 1317     ciEnv::current()->record_out_of_memory_failure();
 1318     return offsets;
 1319   }
 1320   const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 1321 
 1322   // Emit the trampoline stub which will be related to the branch-and-link below.
 1323   CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, offsets.insts_call_instruction_offset);
 1324   if (ciEnv::current()->failing()) { return offsets; } // Code cache may be full.
 1325   __ relocate(rtype);
 1326 
 1327   // Note: At this point we do not have the address of the trampoline
 1328   // stub, and the entry point might be too far away for bl, so __ pc()
 1329   // serves as dummy and the bl will be patched later.
 1330   __ bl((address) __ pc());
 1331 
 1332   offsets.ret_addr_offset = __ offset() - start_offset;
 1333 
 1334   return offsets;
 1335 }
 1336 
 1337 //=============================================================================
 1338 
 1339 // Factory for creating loadConL* nodes for large/small constant pool.
 1340 
 1341 static inline jlong replicate_immF(float con) {
 1342   // Replicate float con 2 times and pack into vector.
 1343   int val = *((int*)&con);
 1344   jlong lval = val;
 1345   lval = (lval << 32) | (lval & 0xFFFFFFFFl);
 1346   return lval;
 1347 }
 1348 
 1349 //=============================================================================
 1350 
 1351 const RegMask& MachConstantBaseNode::_out_RegMask = BITS64_CONSTANT_TABLE_BASE_mask();
 1352 int ConstantTable::calculate_table_base_offset() const {
 1353   return 0;  // absolute addressing, no offset
 1354 }
 1355 
 1356 bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
 1357 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1358   iRegPdstOper *op_dst = new iRegPdstOper();
 1359   MachNode *m1 = new loadToc_hiNode();
 1360   MachNode *m2 = new loadToc_loNode();
 1361 
 1362   m1->add_req(NULL);
 1363   m2->add_req(NULL, m1);
 1364   m1->_opnds[0] = op_dst;
 1365   m2->_opnds[0] = op_dst;
 1366   m2->_opnds[1] = op_dst;
 1367   ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1368   ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 1369   nodes->push(m1);
 1370   nodes->push(m2);
 1371 }
 1372 
 1373 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 1374   // Is postalloc expanded.
 1375   ShouldNotReachHere();
 1376 }
 1377 
 1378 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1379   return 0;
 1380 }
 1381 
 1382 #ifndef PRODUCT
 1383 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1384   st->print("-- \t// MachConstantBaseNode (empty encoding)");
 1385 }
 1386 #endif
 1387 
 1388 //=============================================================================
 1389 
 1390 #ifndef PRODUCT
 1391 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1392   Compile* C = ra_->C;
 1393   const long framesize = C->output()->frame_slots() << LogBytesPerInt;
 1394 
 1395   st->print("PROLOG\n\t");
 1396   if (C->output()->need_stack_bang(framesize)) {
 1397     st->print("stack_overflow_check\n\t");
 1398   }
 1399 
 1400   if (!false /* TODO: PPC port C->is_frameless_method()*/) {
 1401     st->print("save return pc\n\t");
 1402     st->print("push frame %ld\n\t", -framesize);
 1403   }
 1404 
 1405   if (C->stub_function() == NULL) {
 1406     st->print("nmethod entry barrier\n\t");
 1407   }
 1408 }
 1409 #endif
 1410 
 1411 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1412   Compile* C = ra_->C;
 1413   C2_MacroAssembler _masm(&cbuf);
 1414 
 1415   const long framesize = C->output()->frame_size_in_bytes();
 1416   assert(framesize % (2 * wordSize) == 0, "must preserve 2*wordSize alignment");
 1417 
 1418   const bool method_is_frameless      = false /* TODO: PPC port C->is_frameless_method()*/;
 1419 
 1420   const Register return_pc            = R20; // Must match return_addr() in frame section.
 1421   const Register callers_sp           = R21;
 1422   const Register push_frame_temp      = R22;
 1423   const Register toc_temp             = R23;
 1424   assert_different_registers(R11, return_pc, callers_sp, push_frame_temp, toc_temp);
 1425 
 1426   if (method_is_frameless) {
 1427     // Add nop at beginning of all frameless methods to prevent any
 1428     // oop instructions from getting overwritten by make_not_entrant
 1429     // (patching attempt would fail).
 1430     __ nop();
 1431   } else {
 1432     // Get return pc.
 1433     __ mflr(return_pc);
 1434   }
 1435 
 1436   if (C->clinit_barrier_on_entry()) {
 1437     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1438 
 1439     Label L_skip_barrier;
 1440     Register klass = toc_temp;
 1441 
 1442     // Notify OOP recorder (don't need the relocation)
 1443     AddressLiteral md = __ constant_metadata_address(C->method()->holder()->constant_encoding());
 1444     __ load_const_optimized(klass, md.value(), R0);
 1445     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
 1446 
 1447     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
 1448     __ mtctr(klass);
 1449     __ bctr();
 1450 
 1451     __ bind(L_skip_barrier);
 1452   }
 1453 
 1454   // Calls to C2R adapters often do not accept exceptional returns.
 1455   // We require that their callers must bang for them. But be
 1456   // careful, because some VM calls (such as call site linkage) can
 1457   // use several kilobytes of stack. But the stack safety zone should
 1458   // account for that. See bugs 4446381, 4468289, 4497237.
 1459 
 1460   int bangsize = C->output()->bang_size_in_bytes();
 1461   assert(bangsize >= framesize || bangsize <= 0, "stack bang size incorrect");
 1462   if (C->output()->need_stack_bang(bangsize)) {
 1463     // Unfortunately we cannot use the function provided in
 1464     // assembler.cpp as we have to emulate the pipes. So I had to
 1465     // insert the code of generate_stack_overflow_check(), see
 1466     // assembler.cpp for some illuminative comments.
 1467     const int page_size = os::vm_page_size();
 1468     int bang_end = StackOverflow::stack_shadow_zone_size();
 1469 
 1470     // This is how far the previous frame's stack banging extended.
 1471     const int bang_end_safe = bang_end;
 1472 
 1473     if (bangsize > page_size) {
 1474       bang_end += bangsize;
 1475     }
 1476 
 1477     int bang_offset = bang_end_safe;
 1478 
 1479     while (bang_offset <= bang_end) {
 1480       // Need at least one stack bang at end of shadow zone.
 1481 
 1482       // Again I had to copy code, this time from assembler_ppc.cpp,
 1483       // bang_stack_with_offset - see there for comments.
 1484 
 1485       // Stack grows down, caller passes positive offset.
 1486       assert(bang_offset > 0, "must bang with positive offset");
 1487 
 1488       long stdoffset = -bang_offset;
 1489 
 1490       if (Assembler::is_simm(stdoffset, 16)) {
 1491         // Signed 16 bit offset, a simple std is ok.
 1492         if (UseLoadInstructionsForStackBangingPPC64) {
 1493           __ ld(R0,  (int)(signed short)stdoffset, R1_SP);
 1494         } else {
 1495           __ std(R0, (int)(signed short)stdoffset, R1_SP);
 1496         }
 1497       } else if (Assembler::is_simm(stdoffset, 31)) {
 1498         // Use largeoffset calculations for addis & ld/std.
 1499         const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset);
 1500         const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
 1501 
 1502         Register tmp = R11;
 1503         __ addis(tmp, R1_SP, hi);
 1504         if (UseLoadInstructionsForStackBangingPPC64) {
 1505           __ ld(R0, lo, tmp);
 1506         } else {
 1507           __ std(R0, lo, tmp);
 1508         }
 1509       } else {
 1510         ShouldNotReachHere();
 1511       }
 1512 
 1513       bang_offset += page_size;
 1514     }
 1515     // R11 trashed
 1516   } // C->output()->need_stack_bang(framesize)
 1517 
 1518   unsigned int bytes = (unsigned int)framesize;
 1519   long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
 1520   ciMethod *currMethod = C->method();
 1521 
 1522   if (!method_is_frameless) {
 1523     // Get callers sp.
 1524     __ mr(callers_sp, R1_SP);
 1525 
 1526     // Push method's frame, modifies SP.
 1527     assert(Assembler::is_uimm(framesize, 32U), "wrong type");
 1528     // The ABI is already accounted for in 'framesize' via the
 1529     // 'out_preserve' area.
 1530     Register tmp = push_frame_temp;
 1531     // Had to insert code of push_frame((unsigned int)framesize, push_frame_temp).
 1532     if (Assembler::is_simm(-offset, 16)) {
 1533       __ stdu(R1_SP, -offset, R1_SP);
 1534     } else {
 1535       long x = -offset;
 1536       // Had to insert load_const(tmp, -offset).
 1537       __ lis( tmp, (int)((signed short)(((x >> 32) & 0xffff0000) >> 16)));
 1538       __ ori( tmp, tmp, ((x >> 32) & 0x0000ffff));
 1539       __ sldi(tmp, tmp, 32);
 1540       __ oris(tmp, tmp, (x & 0xffff0000) >> 16);
 1541       __ ori( tmp, tmp, (x & 0x0000ffff));
 1542 
 1543       __ stdux(R1_SP, R1_SP, tmp);
 1544     }
 1545   }
 1546 #if 0 // TODO: PPC port
 1547   // For testing large constant pools, emit a lot of constants to constant pool.
 1548   // "Randomize" const_size.
 1549   if (ConstantsALot) {
 1550     const int num_consts = const_size();
 1551     for (int i = 0; i < num_consts; i++) {
 1552       __ long_constant(0xB0B5B00BBABE);
 1553     }
 1554   }
 1555 #endif
 1556   if (!method_is_frameless) {
 1557     // Save return pc.
 1558     __ std(return_pc, _abi0(lr), callers_sp);
 1559   }
 1560 
 1561   if (C->stub_function() == NULL) {
 1562     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 1563     bs->nmethod_entry_barrier(&_masm, push_frame_temp);
 1564   }
 1565 
 1566   C->output()->set_frame_complete(cbuf.insts_size());
 1567 }
 1568 
 1569 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 1570   // Variable size. determine dynamically.
 1571   return MachNode::size(ra_);
 1572 }
 1573 
 1574 int MachPrologNode::reloc() const {
 1575   // Return number of relocatable values contained in this instruction.
 1576   return 1; // 1 reloc entry for load_const(toc).
 1577 }
 1578 
 1579 //=============================================================================
 1580 
 1581 #ifndef PRODUCT
 1582 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1583   Compile* C = ra_->C;
 1584 
 1585   st->print("EPILOG\n\t");
 1586   st->print("restore return pc\n\t");
 1587   st->print("pop frame\n\t");
 1588 
 1589   if (do_polling() && C->is_method_compilation()) {
 1590     st->print("safepoint poll\n\t");
 1591   }
 1592 }
 1593 #endif
 1594 
 1595 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1596   Compile* C = ra_->C;
 1597   C2_MacroAssembler _masm(&cbuf);
 1598 
 1599   const long framesize = ((long)C->output()->frame_slots()) << LogBytesPerInt;
 1600   assert(framesize >= 0, "negative frame-size?");
 1601 
 1602   const bool method_needs_polling = do_polling() && C->is_method_compilation();
 1603   const bool method_is_frameless  = false /* TODO: PPC port C->is_frameless_method()*/;
 1604   const Register return_pc        = R31;  // Must survive C-call to enable_stack_reserved_zone().
 1605   const Register temp             = R12;
 1606 
 1607   if (!method_is_frameless) {
 1608     // Restore return pc relative to callers' sp.
 1609     __ ld(return_pc, ((int)framesize) + _abi0(lr), R1_SP);
 1610     // Move return pc to LR.
 1611     __ mtlr(return_pc);
 1612     // Pop frame (fixed frame-size).
 1613     __ addi(R1_SP, R1_SP, (int)framesize);
 1614   }
 1615 
 1616   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1617     __ reserved_stack_check(return_pc);
 1618   }
 1619 
 1620   if (method_needs_polling) {
 1621     Label dummy_label;
 1622     Label* code_stub = &dummy_label;
 1623     if (!UseSIGTRAP && !C->output()->in_scratch_emit_size()) {
 1624       code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
 1625       __ relocate(relocInfo::poll_return_type);
 1626     }
 1627     __ safepoint_poll(*code_stub, temp, true /* at_return */, true /* in_nmethod */);
 1628   }
 1629 }
 1630 
 1631 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 1632   // Variable size. Determine dynamically.
 1633   return MachNode::size(ra_);
 1634 }
 1635 
 1636 int MachEpilogNode::reloc() const {
 1637   // Return number of relocatable values contained in this instruction.
 1638   return 1; // 1 for load_from_polling_page.
 1639 }
 1640 
 1641 const Pipeline * MachEpilogNode::pipeline() const {
 1642   return MachNode::pipeline_class();
 1643 }
 1644 
 1645 // =============================================================================
 1646 
 1647 // Figure out which register class each belongs in: rc_int, rc_float, rc_vs or
 1648 // rc_stack.
 1649 enum RC { rc_bad, rc_int, rc_float, rc_vs, rc_stack };
 1650 
 1651 static enum RC rc_class(OptoReg::Name reg) {
 1652   // Return the register class for the given register. The given register
 1653   // reg is a <register>_num value, which is an index into the MachRegisterNumbers
 1654   // enumeration in adGlobals_ppc.hpp.
 1655 
 1656   if (reg == OptoReg::Bad) return rc_bad;
 1657 
 1658   // We have 64 integer register halves, starting at index 0.
 1659   if (reg < 64) return rc_int;
 1660 
 1661   // We have 64 floating-point register halves, starting at index 64.
 1662   if (reg < 64+64) return rc_float;
 1663 
 1664   // We have 64 vector-scalar registers, starting at index 128.
 1665   if (reg < 64+64+64) return rc_vs;
 1666 
 1667   // Between float regs & stack are the flags regs.
 1668   assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
 1669 
 1670   return rc_stack;
 1671 }
 1672 
 1673 static int ld_st_helper(CodeBuffer *cbuf, const char *op_str, uint opcode, int reg, int offset,
 1674                         bool do_print, Compile* C, outputStream *st) {
 1675 
 1676   assert(opcode == Assembler::LD_OPCODE   ||
 1677          opcode == Assembler::STD_OPCODE  ||
 1678          opcode == Assembler::LWZ_OPCODE  ||
 1679          opcode == Assembler::STW_OPCODE  ||
 1680          opcode == Assembler::LFD_OPCODE  ||
 1681          opcode == Assembler::STFD_OPCODE ||
 1682          opcode == Assembler::LFS_OPCODE  ||
 1683          opcode == Assembler::STFS_OPCODE,
 1684          "opcode not supported");
 1685 
 1686   if (cbuf) {
 1687     int d =
 1688       (Assembler::LD_OPCODE == opcode || Assembler::STD_OPCODE == opcode) ?
 1689         Assembler::ds(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/)
 1690       : Assembler::d1(offset+0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/); // Makes no difference in opt build.
 1691     emit_long(*cbuf, opcode | Assembler::rt(Matcher::_regEncode[reg]) | d | Assembler::ra(R1_SP));
 1692   }
 1693 #ifndef PRODUCT
 1694   else if (do_print) {
 1695     st->print("%-7s %s, [R1_SP + #%d+%d] \t// spill copy",
 1696               op_str,
 1697               Matcher::regName[reg],
 1698               offset, 0 /* TODO: PPC port C->frame_slots_sp_bias_in_bytes()*/);
 1699   }
 1700 #endif
 1701   return 4; // size
 1702 }
 1703 
 1704 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
 1705   Compile* C = ra_->C;
 1706 
 1707   // Get registers to move.
 1708   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
 1709   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
 1710   OptoReg::Name dst_hi = ra_->get_reg_second(this);
 1711   OptoReg::Name dst_lo = ra_->get_reg_first(this);
 1712 
 1713   enum RC src_hi_rc = rc_class(src_hi);
 1714   enum RC src_lo_rc = rc_class(src_lo);
 1715   enum RC dst_hi_rc = rc_class(dst_hi);
 1716   enum RC dst_lo_rc = rc_class(dst_lo);
 1717 
 1718   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 1719   if (src_hi != OptoReg::Bad)
 1720     assert((src_lo&1)==0 && src_lo+1==src_hi &&
 1721            (dst_lo&1)==0 && dst_lo+1==dst_hi,
 1722            "expected aligned-adjacent pairs");
 1723   // Generate spill code!
 1724   int size = 0;
 1725 
 1726   if (src_lo == dst_lo && src_hi == dst_hi)
 1727     return size;            // Self copy, no move.
 1728 
 1729   if (bottom_type()->isa_vect() != NULL && ideal_reg() == Op_VecX) {
 1730     // Memory->Memory Spill.
 1731     if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1732       int src_offset = ra_->reg2offset(src_lo);
 1733       int dst_offset = ra_->reg2offset(dst_lo);
 1734       if (cbuf) {
 1735         C2_MacroAssembler _masm(cbuf);
 1736         __ ld(R0, src_offset, R1_SP);
 1737         __ std(R0, dst_offset, R1_SP);
 1738         __ ld(R0, src_offset+8, R1_SP);
 1739         __ std(R0, dst_offset+8, R1_SP);
 1740       }
 1741       size += 16;
 1742     }
 1743     // VectorSRegister->Memory Spill.
 1744     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_stack) {
 1745       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1746       int dst_offset = ra_->reg2offset(dst_lo);
 1747       if (cbuf) {
 1748         C2_MacroAssembler _masm(cbuf);
 1749         __ addi(R0, R1_SP, dst_offset);
 1750         __ stxvd2x(Rsrc, R0);
 1751       }
 1752       size += 8;
 1753     }
 1754     // Memory->VectorSRegister Spill.
 1755     else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vs) {
 1756       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1757       int src_offset = ra_->reg2offset(src_lo);
 1758       if (cbuf) {
 1759         C2_MacroAssembler _masm(cbuf);
 1760         __ addi(R0, R1_SP, src_offset);
 1761         __ lxvd2x(Rdst, R0);
 1762       }
 1763       size += 8;
 1764     }
 1765     // VectorSRegister->VectorSRegister.
 1766     else if (src_lo_rc == rc_vs && dst_lo_rc == rc_vs) {
 1767       VectorSRegister Rsrc = as_VectorSRegister(Matcher::_regEncode[src_lo]);
 1768       VectorSRegister Rdst = as_VectorSRegister(Matcher::_regEncode[dst_lo]);
 1769       if (cbuf) {
 1770         C2_MacroAssembler _masm(cbuf);
 1771         __ xxlor(Rdst, Rsrc, Rsrc);
 1772       }
 1773       size += 4;
 1774     }
 1775     else {
 1776       ShouldNotReachHere(); // No VSR spill.
 1777     }
 1778     return size;
 1779   }
 1780 
 1781   // --------------------------------------
 1782   // Memory->Memory Spill. Use R0 to hold the value.
 1783   if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
 1784     int src_offset = ra_->reg2offset(src_lo);
 1785     int dst_offset = ra_->reg2offset(dst_lo);
 1786     if (src_hi != OptoReg::Bad) {
 1787       assert(src_hi_rc==rc_stack && dst_hi_rc==rc_stack,
 1788              "expected same type of move for high parts");
 1789       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE,  R0_num, src_offset, !do_size, C, st);
 1790       if (!cbuf && !do_size) st->print("\n\t");
 1791       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1792     } else {
 1793       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, R0_num, src_offset, !do_size, C, st);
 1794       if (!cbuf && !do_size) st->print("\n\t");
 1795       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, R0_num, dst_offset, !do_size, C, st);
 1796     }
 1797     return size;
 1798   }
 1799 
 1800   // --------------------------------------
 1801   // Check for float->int copy; requires a trip through memory.
 1802   if (src_lo_rc == rc_float && dst_lo_rc == rc_int) {
 1803     Unimplemented();
 1804   }
 1805 
 1806   // --------------------------------------
 1807   // Check for integer reg-reg copy.
 1808   if (src_lo_rc == rc_int && dst_lo_rc == rc_int) {
 1809       Register Rsrc = as_Register(Matcher::_regEncode[src_lo]);
 1810       Register Rdst = as_Register(Matcher::_regEncode[dst_lo]);
 1811       size = (Rsrc != Rdst) ? 4 : 0;
 1812 
 1813       if (cbuf) {
 1814         C2_MacroAssembler _masm(cbuf);
 1815         if (size) {
 1816           __ mr(Rdst, Rsrc);
 1817         }
 1818       }
 1819 #ifndef PRODUCT
 1820       else if (!do_size) {
 1821         if (size) {
 1822           st->print("%-7s %s, %s \t// spill copy", "MR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1823         } else {
 1824           st->print("%-7s %s, %s \t// spill copy", "MR-NOP", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1825         }
 1826       }
 1827 #endif
 1828       return size;
 1829   }
 1830 
 1831   // Check for integer store.
 1832   if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) {
 1833     int dst_offset = ra_->reg2offset(dst_lo);
 1834     if (src_hi != OptoReg::Bad) {
 1835       assert(src_hi_rc==rc_int && dst_hi_rc==rc_stack,
 1836              "expected same type of move for high parts");
 1837       size += ld_st_helper(cbuf, "STD ", Assembler::STD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1838     } else {
 1839       size += ld_st_helper(cbuf, "STW ", Assembler::STW_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1840     }
 1841     return size;
 1842   }
 1843 
 1844   // Check for integer load.
 1845   if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) {
 1846     int src_offset = ra_->reg2offset(src_lo);
 1847     if (src_hi != OptoReg::Bad) {
 1848       assert(dst_hi_rc==rc_int && src_hi_rc==rc_stack,
 1849              "expected same type of move for high parts");
 1850       size += ld_st_helper(cbuf, "LD  ", Assembler::LD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1851     } else {
 1852       size += ld_st_helper(cbuf, "LWZ ", Assembler::LWZ_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1853     }
 1854     return size;
 1855   }
 1856 
 1857   // Check for float reg-reg copy.
 1858   if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
 1859     if (cbuf) {
 1860       C2_MacroAssembler _masm(cbuf);
 1861       FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]);
 1862       FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]);
 1863       __ fmr(Rdst, Rsrc);
 1864     }
 1865 #ifndef PRODUCT
 1866     else if (!do_size) {
 1867       st->print("%-7s %s, %s \t// spill copy", "FMR", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 1868     }
 1869 #endif
 1870     return 4;
 1871   }
 1872 
 1873   // Check for float store.
 1874   if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
 1875     int dst_offset = ra_->reg2offset(dst_lo);
 1876     if (src_hi != OptoReg::Bad) {
 1877       assert(src_hi_rc==rc_float && dst_hi_rc==rc_stack,
 1878              "expected same type of move for high parts");
 1879       size += ld_st_helper(cbuf, "STFD", Assembler::STFD_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1880     } else {
 1881       size += ld_st_helper(cbuf, "STFS", Assembler::STFS_OPCODE, src_lo, dst_offset, !do_size, C, st);
 1882     }
 1883     return size;
 1884   }
 1885 
 1886   // Check for float load.
 1887   if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) {
 1888     int src_offset = ra_->reg2offset(src_lo);
 1889     if (src_hi != OptoReg::Bad) {
 1890       assert(dst_hi_rc==rc_float && src_hi_rc==rc_stack,
 1891              "expected same type of move for high parts");
 1892       size += ld_st_helper(cbuf, "LFD ", Assembler::LFD_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1893     } else {
 1894       size += ld_st_helper(cbuf, "LFS ", Assembler::LFS_OPCODE, dst_lo, src_offset, !do_size, C, st);
 1895     }
 1896     return size;
 1897   }
 1898 
 1899   // --------------------------------------------------------------------
 1900   // Check for hi bits still needing moving. Only happens for misaligned
 1901   // arguments to native calls.
 1902   if (src_hi == dst_hi)
 1903     return size;               // Self copy; no move.
 1904 
 1905   assert(src_hi_rc != rc_bad && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad");
 1906   ShouldNotReachHere(); // Unimplemented
 1907   return 0;
 1908 }
 1909 
 1910 #ifndef PRODUCT
 1911 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1912   if (!ra_)
 1913     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
 1914   else
 1915     implementation(NULL, ra_, false, st);
 1916 }
 1917 #endif
 1918 
 1919 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1920   implementation(&cbuf, ra_, false, NULL);
 1921 }
 1922 
 1923 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1924   return implementation(NULL, ra_, true, NULL);
 1925 }
 1926 
 1927 #ifndef PRODUCT
 1928 void MachNopNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1929   st->print("NOP \t// %d nops to pad for loops or prefixed instructions.", _count);
 1930 }
 1931 #endif
 1932 
 1933 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
 1934   C2_MacroAssembler _masm(&cbuf);
 1935   // _count contains the number of nops needed for padding.
 1936   for (int i = 0; i < _count; i++) {
 1937     __ nop();
 1938   }
 1939 }
 1940 
 1941 uint MachNopNode::size(PhaseRegAlloc *ra_) const {
 1942   return _count * 4;
 1943 }
 1944 
 1945 #ifndef PRODUCT
 1946 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1947   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1948   char reg_str[128];
 1949   ra_->dump_register(this, reg_str);
 1950   st->print("ADDI    %s, SP, %d \t// box node", reg_str, offset);
 1951 }
 1952 #endif
 1953 
 1954 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1955   C2_MacroAssembler _masm(&cbuf);
 1956 
 1957   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1958   int reg    = ra_->get_encode(this);
 1959 
 1960   if (Assembler::is_simm(offset, 16)) {
 1961     __ addi(as_Register(reg), R1, offset);
 1962   } else {
 1963     ShouldNotReachHere();
 1964   }
 1965 }
 1966 
 1967 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1968   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
 1969   return 4;
 1970 }
 1971 
 1972 #ifndef PRODUCT
 1973 void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
 1974   st->print_cr("---- MachUEPNode ----");
 1975   st->print_cr("...");
 1976 }
 1977 #endif
 1978 
 1979 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1980   // This is the unverified entry point.
 1981   C2_MacroAssembler _masm(&cbuf);
 1982 
 1983   // Inline_cache contains a klass.
 1984   Register ic_klass       = as_Register(Matcher::inline_cache_reg_encode());
 1985   Register receiver_klass = R12_scratch2;  // tmp
 1986 
 1987   assert_different_registers(ic_klass, receiver_klass, R11_scratch1, R3_ARG1);
 1988   assert(R11_scratch1 == R11, "need prologue scratch register");
 1989 
 1990   // Check for NULL argument if we don't have implicit null checks.
 1991   if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
 1992     if (TrapBasedNullChecks) {
 1993       __ trap_null_check(R3_ARG1);
 1994     } else {
 1995       Label valid;
 1996       __ cmpdi(CCR0, R3_ARG1, 0);
 1997       __ bne_predict_taken(CCR0, valid);
 1998       // We have a null argument, branch to ic_miss_stub.
 1999       __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 2000                            relocInfo::runtime_call_type);
 2001       __ bind(valid);
 2002     }
 2003   }
 2004   // Assume argument is not NULL, load klass from receiver.
 2005   __ load_klass(receiver_klass, R3_ARG1);
 2006 
 2007   if (TrapBasedICMissChecks) {
 2008     __ trap_ic_miss_check(receiver_klass, ic_klass);
 2009   } else {
 2010     Label valid;
 2011     __ cmpd(CCR0, receiver_klass, ic_klass);
 2012     __ beq_predict_taken(CCR0, valid);
 2013     // We have an unexpected klass, branch to ic_miss_stub.
 2014     __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
 2015                          relocInfo::runtime_call_type);
 2016     __ bind(valid);
 2017   }
 2018 
 2019   // Argument is valid and klass is as expected, continue.
 2020 }
 2021 
 2022 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 2023   // Variable size. Determine dynamically.
 2024   return MachNode::size(ra_);
 2025 }
 2026 
 2027 //=============================================================================
 2028 
 2029 %} // interrupt source
 2030 
 2031 source_hpp %{ // Header information of the source block.
 2032 
 2033 class HandlerImpl {
 2034 
 2035  public:
 2036 
 2037   static int emit_exception_handler(CodeBuffer &cbuf);
 2038   static int emit_deopt_handler(CodeBuffer& cbuf);
 2039 
 2040   static uint size_exception_handler() {
 2041     // The exception_handler is a b64_patchable.
 2042     return MacroAssembler::b64_patchable_size;
 2043   }
 2044 
 2045   static uint size_deopt_handler() {
 2046     // The deopt_handler is a bl64_patchable.
 2047     return MacroAssembler::bl64_patchable_size;
 2048   }
 2049 
 2050 };
 2051 
 2052 class Node::PD {
 2053 public:
 2054   enum NodeFlags {
 2055     _last_flag = Node::_last_flag
 2056   };
 2057 };
 2058 
 2059 %} // end source_hpp
 2060 
 2061 source %{
 2062 
 2063 int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) {
 2064   C2_MacroAssembler _masm(&cbuf);
 2065 
 2066   address base = __ start_a_stub(size_exception_handler());
 2067   if (base == NULL) return 0; // CodeBuffer::expand failed
 2068 
 2069   int offset = __ offset();
 2070   __ b64_patchable((address)OptoRuntime::exception_blob()->content_begin(),
 2071                        relocInfo::runtime_call_type);
 2072   assert(__ offset() - offset == (int)size_exception_handler(), "must be fixed size");
 2073   __ end_a_stub();
 2074 
 2075   return offset;
 2076 }
 2077 
 2078 // The deopt_handler is like the exception handler, but it calls to
 2079 // the deoptimization blob instead of jumping to the exception blob.
 2080 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
 2081   C2_MacroAssembler _masm(&cbuf);
 2082 
 2083   address base = __ start_a_stub(size_deopt_handler());
 2084   if (base == NULL) return 0; // CodeBuffer::expand failed
 2085 
 2086   int offset = __ offset();
 2087   __ bl64_patchable((address)SharedRuntime::deopt_blob()->unpack(),
 2088                         relocInfo::runtime_call_type);
 2089   assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size");
 2090   __ end_a_stub();
 2091 
 2092   return offset;
 2093 }
 2094 
 2095 //=============================================================================
 2096 
 2097 // Use a frame slots bias for frameless methods if accessing the stack.
 2098 static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
 2099   if (as_Register(reg_enc) == R1_SP) {
 2100     return 0; // TODO: PPC port ra_->C->frame_slots_sp_bias_in_bytes();
 2101   }
 2102   return 0;
 2103 }
 2104 
 2105 const bool Matcher::match_rule_supported(int opcode) {
 2106   if (!has_match_rule(opcode)) {
 2107     return false; // no match rule present
 2108   }
 2109 
 2110   switch (opcode) {
 2111     case Op_SqrtD:
 2112       return VM_Version::has_fsqrt();
 2113     case Op_RoundDoubleMode:
 2114       return VM_Version::has_vsx();
 2115     case Op_CountLeadingZerosI:
 2116     case Op_CountLeadingZerosL:
 2117       return UseCountLeadingZerosInstructionsPPC64;
 2118     case Op_CountTrailingZerosI:
 2119     case Op_CountTrailingZerosL:
 2120       return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
 2121     case Op_PopCountI:
 2122     case Op_PopCountL:
 2123       return (UsePopCountInstruction && VM_Version::has_popcntw());
 2124 
 2125     case Op_AddVB:
 2126     case Op_AddVS:
 2127     case Op_AddVI:
 2128     case Op_AddVF:
 2129     case Op_AddVD:
 2130     case Op_SubVB:
 2131     case Op_SubVS:
 2132     case Op_SubVI:
 2133     case Op_SubVF:
 2134     case Op_SubVD:
 2135     case Op_MulVS:
 2136     case Op_MulVF:
 2137     case Op_MulVD:
 2138     case Op_DivVF:
 2139     case Op_DivVD:
 2140     case Op_AbsVF:
 2141     case Op_AbsVD:
 2142     case Op_NegVF:
 2143     case Op_NegVD:
 2144     case Op_SqrtVF:
 2145     case Op_SqrtVD:
 2146     case Op_AddVL:
 2147     case Op_SubVL:
 2148     case Op_MulVI:
 2149     case Op_RoundDoubleModeV:
 2150       return SuperwordUseVSX;
 2151     case Op_PopCountVI:
 2152       return (SuperwordUseVSX && UsePopCountInstruction);
 2153     case Op_FmaVF:
 2154     case Op_FmaVD:
 2155       return (SuperwordUseVSX && UseFMA);
 2156 
 2157     case Op_Digit:
 2158       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
 2159     case Op_LowerCase:
 2160       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
 2161     case Op_UpperCase:
 2162       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
 2163     case Op_Whitespace:
 2164       return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
 2165 
 2166     case Op_CacheWB:
 2167     case Op_CacheWBPreSync:
 2168     case Op_CacheWBPostSync:
 2169       return VM_Version::supports_data_cache_line_flush();
 2170   }
 2171 
 2172   return true; // Per default match rules are supported.
 2173 }
 2174 
 2175 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 2176   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
 2177     return false;
 2178   }
 2179   return true; // Per default match rules are supported.
 2180 }
 2181 
 2182 const RegMask* Matcher::predicate_reg_mask(void) {
 2183   return NULL;
 2184 }
 2185 
 2186 const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
 2187   return NULL;
 2188 }
 2189 
 2190 // Vector calling convention not yet implemented.
 2191 const bool Matcher::supports_vector_calling_convention(void) {
 2192   return false;
 2193 }
 2194 
 2195 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2196   Unimplemented();
 2197   return OptoRegPair(0, 0);
 2198 }
 2199 
 2200 const int Matcher::float_pressure(int default_pressure_threshold) {
 2201   return default_pressure_threshold;
 2202 }
 2203 
 2204 // Vector width in bytes.
 2205 const int Matcher::vector_width_in_bytes(BasicType bt) {
 2206   if (SuperwordUseVSX) {
 2207     assert(MaxVectorSize == 16, "");
 2208     return 16;
 2209   } else {
 2210     assert(MaxVectorSize == 8, "");
 2211     return 8;
 2212   }
 2213 }
 2214 
 2215 // Vector ideal reg.
 2216 const uint Matcher::vector_ideal_reg(int size) {
 2217   if (SuperwordUseVSX) {
 2218     assert(MaxVectorSize == 16 && size == 16, "");
 2219     return Op_VecX;
 2220   } else {
 2221     assert(MaxVectorSize == 8 && size == 8, "");
 2222     return Op_RegL;
 2223   }
 2224 }
 2225 
 2226 // Limits on vector size (number of elements) loaded into vector.
 2227 const int Matcher::max_vector_size(const BasicType bt) {
 2228   assert(is_java_primitive(bt), "only primitive type vectors");
 2229   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 2230 }
 2231 
 2232 const int Matcher::min_vector_size(const BasicType bt) {
 2233   return max_vector_size(bt); // Same as max.
 2234 }
 2235 
 2236 const int Matcher::scalable_vector_reg_size(const BasicType bt) {
 2237   return -1;
 2238 }
 2239 
 2240 // RETURNS: whether this branch offset is short enough that a short
 2241 // branch can be used.
 2242 //
 2243 // If the platform does not provide any short branch variants, then
 2244 // this method should return `false' for offset 0.
 2245 //
 2246 // `Compile::Fill_buffer' will decide on basis of this information
 2247 // whether to do the pass `Compile::Shorten_branches' at all.
 2248 //
 2249 // And `Compile::Shorten_branches' will decide on basis of this
 2250 // information whether to replace particular branch sites by short
 2251 // ones.
 2252 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2253   // Is the offset within the range of a ppc64 pc relative branch?
 2254   bool b;
 2255 
 2256   const int safety_zone = 3 * BytesPerInstWord;
 2257   b = Assembler::is_simm((offset<0 ? offset-safety_zone : offset+safety_zone),
 2258                          29 - 16 + 1 + 2);
 2259   return b;
 2260 }
 2261 
 2262 /* TODO: PPC port
 2263 // Make a new machine dependent decode node (with its operands).
 2264 MachTypeNode *Matcher::make_decode_node() {
 2265   assert(CompressedOops::base() == NULL && CompressedOops::shift() == 0,
 2266          "This method is only implemented for unscaled cOops mode so far");
 2267   MachTypeNode *decode = new decodeN_unscaledNode();
 2268   decode->set_opnd_array(0, new iRegPdstOper());
 2269   decode->set_opnd_array(1, new iRegNsrcOper());
 2270   return decode;
 2271 }
 2272 */
 2273 
 2274 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
 2275   ShouldNotReachHere(); // generic vector operands not supported
 2276   return NULL;
 2277 }
 2278 
 2279 bool Matcher::is_generic_reg2reg_move(MachNode* m) {
 2280   ShouldNotReachHere();  // generic vector operands not supported
 2281   return false;
 2282 }
 2283 
 2284 bool Matcher::is_generic_vector(MachOper* opnd)  {
 2285   ShouldNotReachHere();  // generic vector operands not supported
 2286   return false;
 2287 }
 2288 
 2289 // Constants for c2c and c calling conventions.
 2290 
 2291 const MachRegisterNumbers iarg_reg[8] = {
 2292   R3_num, R4_num, R5_num, R6_num,
 2293   R7_num, R8_num, R9_num, R10_num
 2294 };
 2295 
 2296 const MachRegisterNumbers farg_reg[13] = {
 2297   F1_num, F2_num, F3_num, F4_num,
 2298   F5_num, F6_num, F7_num, F8_num,
 2299   F9_num, F10_num, F11_num, F12_num,
 2300   F13_num
 2301 };
 2302 
 2303 const MachRegisterNumbers vsarg_reg[64] = {
 2304   VSR0_num, VSR1_num, VSR2_num, VSR3_num,
 2305   VSR4_num, VSR5_num, VSR6_num, VSR7_num,
 2306   VSR8_num, VSR9_num, VSR10_num, VSR11_num,
 2307   VSR12_num, VSR13_num, VSR14_num, VSR15_num,
 2308   VSR16_num, VSR17_num, VSR18_num, VSR19_num,
 2309   VSR20_num, VSR21_num, VSR22_num, VSR23_num,
 2310   VSR24_num, VSR23_num, VSR24_num, VSR25_num,
 2311   VSR28_num, VSR29_num, VSR30_num, VSR31_num,
 2312   VSR32_num, VSR33_num, VSR34_num, VSR35_num,
 2313   VSR36_num, VSR37_num, VSR38_num, VSR39_num,
 2314   VSR40_num, VSR41_num, VSR42_num, VSR43_num,
 2315   VSR44_num, VSR45_num, VSR46_num, VSR47_num,
 2316   VSR48_num, VSR49_num, VSR50_num, VSR51_num,
 2317   VSR52_num, VSR53_num, VSR54_num, VSR55_num,
 2318   VSR56_num, VSR57_num, VSR58_num, VSR59_num,
 2319   VSR60_num, VSR61_num, VSR62_num, VSR63_num
 2320 };
 2321 
 2322 const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
 2323 
 2324 const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
 2325 
 2326 const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
 2327 
 2328 // Return whether or not this register is ever used as an argument. This
 2329 // function is used on startup to build the trampoline stubs in generateOptoStub.
 2330 // Registers not mentioned will be killed by the VM call in the trampoline, and
 2331 // arguments in those registers not be available to the callee.
 2332 bool Matcher::can_be_java_arg(int reg) {
 2333   // We return true for all registers contained in iarg_reg[] and
 2334   // farg_reg[] and their virtual halves.
 2335   // We must include the virtual halves in order to get STDs and LDs
 2336   // instead of STWs and LWs in the trampoline stubs.
 2337 
 2338   if (   reg == R3_num  || reg == R3_H_num
 2339       || reg == R4_num  || reg == R4_H_num
 2340       || reg == R5_num  || reg == R5_H_num
 2341       || reg == R6_num  || reg == R6_H_num
 2342       || reg == R7_num  || reg == R7_H_num
 2343       || reg == R8_num  || reg == R8_H_num
 2344       || reg == R9_num  || reg == R9_H_num
 2345       || reg == R10_num || reg == R10_H_num)
 2346     return true;
 2347 
 2348   if (   reg == F1_num  || reg == F1_H_num
 2349       || reg == F2_num  || reg == F2_H_num
 2350       || reg == F3_num  || reg == F3_H_num
 2351       || reg == F4_num  || reg == F4_H_num
 2352       || reg == F5_num  || reg == F5_H_num
 2353       || reg == F6_num  || reg == F6_H_num
 2354       || reg == F7_num  || reg == F7_H_num
 2355       || reg == F8_num  || reg == F8_H_num
 2356       || reg == F9_num  || reg == F9_H_num
 2357       || reg == F10_num || reg == F10_H_num
 2358       || reg == F11_num || reg == F11_H_num
 2359       || reg == F12_num || reg == F12_H_num
 2360       || reg == F13_num || reg == F13_H_num)
 2361     return true;
 2362 
 2363   return false;
 2364 }
 2365 
 2366 bool Matcher::is_spillable_arg(int reg) {
 2367   return can_be_java_arg(reg);
 2368 }
 2369 
 2370 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 2371   return false;
 2372 }
 2373 
 2374 // Register for DIVI projection of divmodI.
 2375 RegMask Matcher::divI_proj_mask() {
 2376   ShouldNotReachHere();
 2377   return RegMask();
 2378 }
 2379 
 2380 // Register for MODI projection of divmodI.
 2381 RegMask Matcher::modI_proj_mask() {
 2382   ShouldNotReachHere();
 2383   return RegMask();
 2384 }
 2385 
 2386 // Register for DIVL projection of divmodL.
 2387 RegMask Matcher::divL_proj_mask() {
 2388   ShouldNotReachHere();
 2389   return RegMask();
 2390 }
 2391 
 2392 // Register for MODL projection of divmodL.
 2393 RegMask Matcher::modL_proj_mask() {
 2394   ShouldNotReachHere();
 2395   return RegMask();
 2396 }
 2397 
 2398 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 2399   return RegMask();
 2400 }
 2401 
 2402 %}
 2403 
 2404 //----------ENCODING BLOCK-----------------------------------------------------
 2405 // This block specifies the encoding classes used by the compiler to output
 2406 // byte streams. Encoding classes are parameterized macros used by
 2407 // Machine Instruction Nodes in order to generate the bit encoding of the
 2408 // instruction. Operands specify their base encoding interface with the
 2409 // interface keyword. There are currently supported four interfaces,
 2410 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
 2411 // operand to generate a function which returns its register number when
 2412 // queried. CONST_INTER causes an operand to generate a function which
 2413 // returns the value of the constant when queried. MEMORY_INTER causes an
 2414 // operand to generate four functions which return the Base Register, the
 2415 // Index Register, the Scale Value, and the Offset Value of the operand when
 2416 // queried. COND_INTER causes an operand to generate six functions which
 2417 // return the encoding code (ie - encoding bits for the instruction)
 2418 // associated with each basic boolean condition for a conditional instruction.
 2419 //
 2420 // Instructions specify two basic values for encoding. Again, a function
 2421 // is available to check if the constant displacement is an oop. They use the
 2422 // ins_encode keyword to specify their encoding classes (which must be
 2423 // a sequence of enc_class names, and their parameters, specified in
 2424 // the encoding block), and they use the
 2425 // opcode keyword to specify, in order, their primary, secondary, and
 2426 // tertiary opcode. Only the opcode sections which a particular instruction
 2427 // needs for encoding need to be specified.
 2428 encode %{
 2429   enc_class enc_unimplemented %{
 2430     C2_MacroAssembler _masm(&cbuf);
 2431     __ unimplemented("Unimplemented mach node encoding in AD file.", 13);
 2432   %}
 2433 
 2434   enc_class enc_untested %{
 2435 #ifdef ASSERT
 2436     C2_MacroAssembler _masm(&cbuf);
 2437     __ untested("Untested mach node encoding in AD file.");
 2438 #else
 2439 #endif
 2440   %}
 2441 
 2442   enc_class enc_lbz(iRegIdst dst, memory mem) %{
 2443     C2_MacroAssembler _masm(&cbuf);
 2444     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2445     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2446   %}
 2447 
 2448   // Load acquire.
 2449   enc_class enc_lbz_ac(iRegIdst dst, memory mem) %{
 2450     C2_MacroAssembler _masm(&cbuf);
 2451     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2452     __ lbz($dst$$Register, Idisp, $mem$$base$$Register);
 2453     __ twi_0($dst$$Register);
 2454     __ isync();
 2455   %}
 2456 
 2457   enc_class enc_lhz(iRegIdst dst, memory mem) %{
 2458 
 2459     C2_MacroAssembler _masm(&cbuf);
 2460     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2461     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2462   %}
 2463 
 2464   // Load acquire.
 2465   enc_class enc_lhz_ac(iRegIdst dst, memory mem) %{
 2466 
 2467     C2_MacroAssembler _masm(&cbuf);
 2468     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2469     __ lhz($dst$$Register, Idisp, $mem$$base$$Register);
 2470     __ twi_0($dst$$Register);
 2471     __ isync();
 2472   %}
 2473 
 2474   enc_class enc_lwz(iRegIdst dst, memory mem) %{
 2475 
 2476     C2_MacroAssembler _masm(&cbuf);
 2477     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2478     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2479   %}
 2480 
 2481   // Load acquire.
 2482   enc_class enc_lwz_ac(iRegIdst dst, memory mem) %{
 2483 
 2484     C2_MacroAssembler _masm(&cbuf);
 2485     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2486     __ lwz($dst$$Register, Idisp, $mem$$base$$Register);
 2487     __ twi_0($dst$$Register);
 2488     __ isync();
 2489   %}
 2490 
 2491   enc_class enc_ld(iRegLdst dst, memoryAlg4 mem) %{
 2492     C2_MacroAssembler _masm(&cbuf);
 2493     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2494     // Operand 'ds' requires 4-alignment.
 2495     assert((Idisp & 0x3) == 0, "unaligned offset");
 2496     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2497   %}
 2498 
 2499   // Load acquire.
 2500   enc_class enc_ld_ac(iRegLdst dst, memoryAlg4 mem) %{
 2501     C2_MacroAssembler _masm(&cbuf);
 2502     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2503     // Operand 'ds' requires 4-alignment.
 2504     assert((Idisp & 0x3) == 0, "unaligned offset");
 2505     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 2506     __ twi_0($dst$$Register);
 2507     __ isync();
 2508   %}
 2509 
 2510   enc_class enc_lfd(RegF dst, memory mem) %{
 2511     C2_MacroAssembler _masm(&cbuf);
 2512     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2513     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 2514   %}
 2515 
 2516   enc_class enc_load_long_constL(iRegLdst dst, immL src, iRegLdst toc) %{
 2517 
 2518     C2_MacroAssembler _masm(&cbuf);
 2519     int toc_offset = 0;
 2520 
 2521     address const_toc_addr;
 2522     // Create a non-oop constant, no relocation needed.
 2523     // If it is an IC, it has a virtual_call_Relocation.
 2524     const_toc_addr = __ long_constant((jlong)$src$$constant);
 2525     if (const_toc_addr == NULL) {
 2526       ciEnv::current()->record_out_of_memory_failure();
 2527       return;
 2528     }
 2529 
 2530     // Get the constant's TOC offset.
 2531     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2532 
 2533     // Keep the current instruction offset in mind.
 2534     ((loadConLNode*)this)->_cbuf_insts_offset = __ offset();
 2535 
 2536     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2537   %}
 2538 
 2539   enc_class enc_load_long_constL_hi(iRegLdst dst, iRegLdst toc, immL src) %{
 2540 
 2541     C2_MacroAssembler _masm(&cbuf);
 2542 
 2543     if (!ra_->C->output()->in_scratch_emit_size()) {
 2544       address const_toc_addr;
 2545       // Create a non-oop constant, no relocation needed.
 2546       // If it is an IC, it has a virtual_call_Relocation.
 2547       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2548       if (const_toc_addr == NULL) {
 2549         ciEnv::current()->record_out_of_memory_failure();
 2550         return;
 2551       }
 2552 
 2553       // Get the constant's TOC offset.
 2554       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2555       // Store the toc offset of the constant.
 2556       ((loadConL_hiNode*)this)->_const_toc_offset = toc_offset;
 2557 
 2558       // Also keep the current instruction offset in mind.
 2559       ((loadConL_hiNode*)this)->_cbuf_insts_offset = __ offset();
 2560     }
 2561 
 2562     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2563   %}
 2564 
 2565 %} // encode
 2566 
 2567 source %{
 2568 
 2569 typedef struct {
 2570   loadConL_hiNode *_large_hi;
 2571   loadConL_loNode *_large_lo;
 2572   loadConLNode    *_small;
 2573   MachNode        *_last;
 2574 } loadConLNodesTuple;
 2575 
 2576 loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2577                                              OptoReg::Name reg_second, OptoReg::Name reg_first) {
 2578   loadConLNodesTuple nodes;
 2579 
 2580   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2581   if (large_constant_pool) {
 2582     // Create new nodes.
 2583     loadConL_hiNode *m1 = new loadConL_hiNode();
 2584     loadConL_loNode *m2 = new loadConL_loNode();
 2585 
 2586     // inputs for new nodes
 2587     m1->add_req(NULL, toc);
 2588     m2->add_req(NULL, m1);
 2589 
 2590     // operands for new nodes
 2591     m1->_opnds[0] = new iRegLdstOper(); // dst
 2592     m1->_opnds[1] = immSrc;             // src
 2593     m1->_opnds[2] = new iRegPdstOper(); // toc
 2594     m2->_opnds[0] = new iRegLdstOper(); // dst
 2595     m2->_opnds[1] = immSrc;             // src
 2596     m2->_opnds[2] = new iRegLdstOper(); // base
 2597 
 2598     // Initialize ins_attrib TOC fields.
 2599     m1->_const_toc_offset = -1;
 2600     m2->_const_toc_offset_hi_node = m1;
 2601 
 2602     // Initialize ins_attrib instruction offset.
 2603     m1->_cbuf_insts_offset = -1;
 2604 
 2605     // register allocation for new nodes
 2606     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2607     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2608 
 2609     // Create result.
 2610     nodes._large_hi = m1;
 2611     nodes._large_lo = m2;
 2612     nodes._small = NULL;
 2613     nodes._last = nodes._large_lo;
 2614     assert(m2->bottom_type()->isa_long(), "must be long");
 2615   } else {
 2616     loadConLNode *m2 = new loadConLNode();
 2617 
 2618     // inputs for new nodes
 2619     m2->add_req(NULL, toc);
 2620 
 2621     // operands for new nodes
 2622     m2->_opnds[0] = new iRegLdstOper(); // dst
 2623     m2->_opnds[1] = immSrc;             // src
 2624     m2->_opnds[2] = new iRegPdstOper(); // toc
 2625 
 2626     // Initialize ins_attrib instruction offset.
 2627     m2->_cbuf_insts_offset = -1;
 2628 
 2629     // register allocation for new nodes
 2630     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2631 
 2632     // Create result.
 2633     nodes._large_hi = NULL;
 2634     nodes._large_lo = NULL;
 2635     nodes._small = m2;
 2636     nodes._last = nodes._small;
 2637     assert(m2->bottom_type()->isa_long(), "must be long");
 2638   }
 2639 
 2640   return nodes;
 2641 }
 2642 
 2643 typedef struct {
 2644   loadConL_hiNode *_large_hi;
 2645   loadConL_loNode *_large_lo;
 2646   mtvsrdNode      *_moved;
 2647   xxspltdNode     *_replicated;
 2648   loadConLNode    *_small;
 2649   MachNode        *_last;
 2650 } loadConLReplicatedNodesTuple;
 2651 
 2652 loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
 2653                                                  vecXOper *dst, immI_0Oper *zero,
 2654                                                  OptoReg::Name reg_second, OptoReg::Name reg_first,
 2655                                                  OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
 2656   loadConLReplicatedNodesTuple nodes;
 2657 
 2658   const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2659   if (large_constant_pool) {
 2660     // Create new nodes.
 2661     loadConL_hiNode *m1 = new  loadConL_hiNode();
 2662     loadConL_loNode *m2 = new  loadConL_loNode();
 2663     mtvsrdNode *m3 = new  mtvsrdNode();
 2664     xxspltdNode *m4 = new  xxspltdNode();
 2665 
 2666     // inputs for new nodes
 2667     m1->add_req(NULL, toc);
 2668     m2->add_req(NULL, m1);
 2669     m3->add_req(NULL, m2);
 2670     m4->add_req(NULL, m3);
 2671 
 2672     // operands for new nodes
 2673     m1->_opnds[0] = new  iRegLdstOper(); // dst
 2674     m1->_opnds[1] = immSrc;              // src
 2675     m1->_opnds[2] = new  iRegPdstOper(); // toc
 2676 
 2677     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2678     m2->_opnds[1] = immSrc;              // src
 2679     m2->_opnds[2] = new  iRegLdstOper(); // base
 2680 
 2681     m3->_opnds[0] = new  vecXOper();     // dst
 2682     m3->_opnds[1] = new  iRegLdstOper(); // src
 2683 
 2684     m4->_opnds[0] = new  vecXOper();     // dst
 2685     m4->_opnds[1] = new  vecXOper();     // src
 2686     m4->_opnds[2] = zero;
 2687 
 2688     // Initialize ins_attrib TOC fields.
 2689     m1->_const_toc_offset = -1;
 2690     m2->_const_toc_offset_hi_node = m1;
 2691 
 2692     // Initialize ins_attrib instruction offset.
 2693     m1->_cbuf_insts_offset = -1;
 2694 
 2695     // register allocation for new nodes
 2696     ra_->set_pair(m1->_idx, reg_second, reg_first);
 2697     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2698     ra_->set1(m3->_idx, reg_second);
 2699     ra_->set2(m3->_idx, reg_vec_first);
 2700     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2701 
 2702     // Create result.
 2703     nodes._large_hi = m1;
 2704     nodes._large_lo = m2;
 2705     nodes._moved = m3;
 2706     nodes._replicated = m4;
 2707     nodes._small = NULL;
 2708     nodes._last = nodes._replicated;
 2709     assert(m2->bottom_type()->isa_long(), "must be long");
 2710   } else {
 2711     loadConLNode *m2 = new  loadConLNode();
 2712     mtvsrdNode *m3 = new  mtvsrdNode();
 2713     xxspltdNode *m4 = new  xxspltdNode();
 2714 
 2715     // inputs for new nodes
 2716     m2->add_req(NULL, toc);
 2717 
 2718     // operands for new nodes
 2719     m2->_opnds[0] = new  iRegLdstOper(); // dst
 2720     m2->_opnds[1] = immSrc;              // src
 2721     m2->_opnds[2] = new  iRegPdstOper(); // toc
 2722 
 2723     m3->_opnds[0] = new  vecXOper();     // dst
 2724     m3->_opnds[1] = new  iRegLdstOper(); // src
 2725 
 2726     m4->_opnds[0] = new  vecXOper();     // dst
 2727     m4->_opnds[1] = new  vecXOper();     // src
 2728     m4->_opnds[2] = zero;
 2729 
 2730     // Initialize ins_attrib instruction offset.
 2731     m2->_cbuf_insts_offset = -1;
 2732     ra_->set1(m3->_idx, reg_second);
 2733     ra_->set2(m3->_idx, reg_vec_first);
 2734     ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
 2735 
 2736     // register allocation for new nodes
 2737     ra_->set_pair(m2->_idx, reg_second, reg_first);
 2738 
 2739     // Create result.
 2740     nodes._large_hi = NULL;
 2741     nodes._large_lo = NULL;
 2742     nodes._small = m2;
 2743     nodes._moved = m3;
 2744     nodes._replicated = m4;
 2745     nodes._last = nodes._replicated;
 2746     assert(m2->bottom_type()->isa_long(), "must be long");
 2747   }
 2748 
 2749   return nodes;
 2750 }
 2751 
 2752 %} // source
 2753 
 2754 encode %{
 2755   // Postalloc expand emitter for loading a long constant from the method's TOC.
 2756   // Enc_class needed as consttanttablebase is not supported by postalloc
 2757   // expand.
 2758   enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
 2759     // Create new nodes.
 2760     loadConLNodesTuple loadConLNodes =
 2761       loadConLNodesTuple_create(ra_, n_toc, op_src,
 2762                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 2763 
 2764     // Push new nodes.
 2765     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 2766     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 2767 
 2768     // some asserts
 2769     assert(nodes->length() >= 1, "must have created at least 1 node");
 2770     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 2771   %}
 2772 
 2773   enc_class enc_load_long_constP(iRegLdst dst, immP src, iRegLdst toc) %{
 2774 
 2775     C2_MacroAssembler _masm(&cbuf);
 2776     int toc_offset = 0;
 2777 
 2778     intptr_t val = $src$$constant;
 2779     relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2780     address const_toc_addr;
 2781     if (constant_reloc == relocInfo::oop_type) {
 2782       // Create an oop constant and a corresponding relocation.
 2783       AddressLiteral a = __ allocate_oop_address((jobject)val);
 2784       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2785       __ relocate(a.rspec());
 2786     } else if (constant_reloc == relocInfo::metadata_type) {
 2787       AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2788       const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2789       __ relocate(a.rspec());
 2790     } else {
 2791       // Create a non-oop constant, no relocation needed.
 2792       const_toc_addr = __ long_constant((jlong)$src$$constant);
 2793     }
 2794 
 2795     if (const_toc_addr == NULL) {
 2796       ciEnv::current()->record_out_of_memory_failure();
 2797       return;
 2798     }
 2799     // Get the constant's TOC offset.
 2800     toc_offset = __ offset_to_method_toc(const_toc_addr);
 2801 
 2802     __ ld($dst$$Register, toc_offset, $toc$$Register);
 2803   %}
 2804 
 2805   enc_class enc_load_long_constP_hi(iRegLdst dst, immP src, iRegLdst toc) %{
 2806 
 2807     C2_MacroAssembler _masm(&cbuf);
 2808     if (!ra_->C->output()->in_scratch_emit_size()) {
 2809       intptr_t val = $src$$constant;
 2810       relocInfo::relocType constant_reloc = $src->constant_reloc();  // src
 2811       address const_toc_addr;
 2812       if (constant_reloc == relocInfo::oop_type) {
 2813         // Create an oop constant and a corresponding relocation.
 2814         AddressLiteral a = __ allocate_oop_address((jobject)val);
 2815         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2816         __ relocate(a.rspec());
 2817       } else if (constant_reloc == relocInfo::metadata_type) {
 2818         AddressLiteral a = __ constant_metadata_address((Metadata *)val);
 2819         const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none);
 2820         __ relocate(a.rspec());
 2821       } else {  // non-oop pointers, e.g. card mark base, heap top
 2822         // Create a non-oop constant, no relocation needed.
 2823         const_toc_addr = __ long_constant((jlong)$src$$constant);
 2824       }
 2825 
 2826       if (const_toc_addr == NULL) {
 2827         ciEnv::current()->record_out_of_memory_failure();
 2828         return;
 2829       }
 2830       // Get the constant's TOC offset.
 2831       const int toc_offset = __ offset_to_method_toc(const_toc_addr);
 2832       // Store the toc offset of the constant.
 2833       ((loadConP_hiNode*)this)->_const_toc_offset = toc_offset;
 2834     }
 2835 
 2836     __ addis($dst$$Register, $toc$$Register, MacroAssembler::largeoffset_si16_si16_hi(_const_toc_offset));
 2837   %}
 2838 
 2839   // Postalloc expand emitter for loading a ptr constant from the method's TOC.
 2840   // Enc_class needed as consttanttablebase is not supported by postalloc
 2841   // expand.
 2842   enc_class postalloc_expand_load_ptr_constant(iRegPdst dst, immP src, iRegLdst toc) %{
 2843     const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2844     if (large_constant_pool) {
 2845       // Create new nodes.
 2846       loadConP_hiNode *m1 = new loadConP_hiNode();
 2847       loadConP_loNode *m2 = new loadConP_loNode();
 2848 
 2849       // inputs for new nodes
 2850       m1->add_req(NULL, n_toc);
 2851       m2->add_req(NULL, m1);
 2852 
 2853       // operands for new nodes
 2854       m1->_opnds[0] = new iRegPdstOper(); // dst
 2855       m1->_opnds[1] = op_src;             // src
 2856       m1->_opnds[2] = new iRegPdstOper(); // toc
 2857       m2->_opnds[0] = new iRegPdstOper(); // dst
 2858       m2->_opnds[1] = op_src;             // src
 2859       m2->_opnds[2] = new iRegLdstOper(); // base
 2860 
 2861       // Initialize ins_attrib TOC fields.
 2862       m1->_const_toc_offset = -1;
 2863       m2->_const_toc_offset_hi_node = m1;
 2864 
 2865       // Register allocation for new nodes.
 2866       ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2867       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2868 
 2869       nodes->push(m1);
 2870       nodes->push(m2);
 2871       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2872     } else {
 2873       loadConPNode *m2 = new loadConPNode();
 2874 
 2875       // inputs for new nodes
 2876       m2->add_req(NULL, n_toc);
 2877 
 2878       // operands for new nodes
 2879       m2->_opnds[0] = new iRegPdstOper(); // dst
 2880       m2->_opnds[1] = op_src;             // src
 2881       m2->_opnds[2] = new iRegPdstOper(); // toc
 2882 
 2883       // Register allocation for new nodes.
 2884       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2885 
 2886       nodes->push(m2);
 2887       assert(m2->bottom_type()->isa_ptr(), "must be ptr");
 2888     }
 2889   %}
 2890 
 2891   // Enc_class needed as consttanttablebase is not supported by postalloc
 2892   // expand.
 2893   enc_class postalloc_expand_load_float_constant(regF dst, immF src, iRegLdst toc) %{
 2894     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2895 
 2896     MachNode *m2;
 2897     if (large_constant_pool) {
 2898       m2 = new loadConFCompNode();
 2899     } else {
 2900       m2 = new loadConFNode();
 2901     }
 2902     // inputs for new nodes
 2903     m2->add_req(NULL, n_toc);
 2904 
 2905     // operands for new nodes
 2906     m2->_opnds[0] = op_dst;
 2907     m2->_opnds[1] = op_src;
 2908     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2909 
 2910     // register allocation for new nodes
 2911     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2912     nodes->push(m2);
 2913   %}
 2914 
 2915   // Enc_class needed as consttanttablebase is not supported by postalloc
 2916   // expand.
 2917   enc_class postalloc_expand_load_double_constant(regD dst, immD src, iRegLdst toc) %{
 2918     bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
 2919 
 2920     MachNode *m2;
 2921     if (large_constant_pool) {
 2922       m2 = new loadConDCompNode();
 2923     } else {
 2924       m2 = new loadConDNode();
 2925     }
 2926     // inputs for new nodes
 2927     m2->add_req(NULL, n_toc);
 2928 
 2929     // operands for new nodes
 2930     m2->_opnds[0] = op_dst;
 2931     m2->_opnds[1] = op_src;
 2932     m2->_opnds[2] = new iRegPdstOper(); // constanttablebase
 2933 
 2934     // register allocation for new nodes
 2935     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2936     nodes->push(m2);
 2937   %}
 2938 
 2939   enc_class enc_stw(iRegIsrc src, memory mem) %{
 2940     C2_MacroAssembler _masm(&cbuf);
 2941     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2942     __ stw($src$$Register, Idisp, $mem$$base$$Register);
 2943   %}
 2944 
 2945   enc_class enc_std(iRegIsrc src, memoryAlg4 mem) %{
 2946     C2_MacroAssembler _masm(&cbuf);
 2947     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2948     // Operand 'ds' requires 4-alignment.
 2949     assert((Idisp & 0x3) == 0, "unaligned offset");
 2950     __ std($src$$Register, Idisp, $mem$$base$$Register);
 2951   %}
 2952 
 2953   enc_class enc_stfs(RegF src, memory mem) %{
 2954     C2_MacroAssembler _masm(&cbuf);
 2955     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2956     __ stfs($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2957   %}
 2958 
 2959   enc_class enc_stfd(RegF src, memory mem) %{
 2960     C2_MacroAssembler _masm(&cbuf);
 2961     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 2962     __ stfd($src$$FloatRegister, Idisp, $mem$$base$$Register);
 2963   %}
 2964 
 2965   enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
 2966 
 2967     if (VM_Version::has_isel()) {
 2968       // use isel instruction with Power 7
 2969       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 2970       encodeP_subNode    *n_sub_base = new encodeP_subNode();
 2971       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 2972       cond_set_0_oopNode *n_cond_set = new cond_set_0_oopNode();
 2973 
 2974       n_compare->add_req(n_region, n_src);
 2975       n_compare->_opnds[0] = op_crx;
 2976       n_compare->_opnds[1] = op_src;
 2977       n_compare->_opnds[2] = new immL16Oper(0);
 2978 
 2979       n_sub_base->add_req(n_region, n_src);
 2980       n_sub_base->_opnds[0] = op_dst;
 2981       n_sub_base->_opnds[1] = op_src;
 2982       n_sub_base->_bottom_type = _bottom_type;
 2983 
 2984       n_shift->add_req(n_region, n_sub_base);
 2985       n_shift->_opnds[0] = op_dst;
 2986       n_shift->_opnds[1] = op_dst;
 2987       n_shift->_bottom_type = _bottom_type;
 2988 
 2989       n_cond_set->add_req(n_region, n_compare, n_shift);
 2990       n_cond_set->_opnds[0] = op_dst;
 2991       n_cond_set->_opnds[1] = op_crx;
 2992       n_cond_set->_opnds[2] = op_dst;
 2993       n_cond_set->_bottom_type = _bottom_type;
 2994 
 2995       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 2996       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2997       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2998       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 2999 
 3000       nodes->push(n_compare);
 3001       nodes->push(n_sub_base);
 3002       nodes->push(n_shift);
 3003       nodes->push(n_cond_set);
 3004 
 3005     } else {
 3006       // before Power 7
 3007       moveRegNode        *n_move     = new moveRegNode();
 3008       cmpP_reg_imm16Node *n_compare  = new cmpP_reg_imm16Node();
 3009       encodeP_shiftNode  *n_shift    = new encodeP_shiftNode();
 3010       cond_sub_baseNode  *n_sub_base = new cond_sub_baseNode();
 3011 
 3012       n_move->add_req(n_region, n_src);
 3013       n_move->_opnds[0] = op_dst;
 3014       n_move->_opnds[1] = op_src;
 3015       ra_->set_oop(n_move, true); // Until here, 'n_move' still produces an oop.
 3016 
 3017       n_compare->add_req(n_region, n_src);
 3018       n_compare->add_prec(n_move);
 3019 
 3020       n_compare->_opnds[0] = op_crx;
 3021       n_compare->_opnds[1] = op_src;
 3022       n_compare->_opnds[2] = new immL16Oper(0);
 3023 
 3024       n_sub_base->add_req(n_region, n_compare, n_src);
 3025       n_sub_base->_opnds[0] = op_dst;
 3026       n_sub_base->_opnds[1] = op_crx;
 3027       n_sub_base->_opnds[2] = op_src;
 3028       n_sub_base->_bottom_type = _bottom_type;
 3029 
 3030       n_shift->add_req(n_region, n_sub_base);
 3031       n_shift->_opnds[0] = op_dst;
 3032       n_shift->_opnds[1] = op_dst;
 3033       n_shift->_bottom_type = _bottom_type;
 3034 
 3035       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3036       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3037       ra_->set_pair(n_sub_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3038       ra_->set_pair(n_move->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3039 
 3040       nodes->push(n_move);
 3041       nodes->push(n_compare);
 3042       nodes->push(n_sub_base);
 3043       nodes->push(n_shift);
 3044     }
 3045 
 3046     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3047   %}
 3048 
 3049   enc_class postalloc_expand_encode_oop_not_null(iRegNdst dst, iRegPdst src) %{
 3050 
 3051     encodeP_subNode *n1 = new encodeP_subNode();
 3052     n1->add_req(n_region, n_src);
 3053     n1->_opnds[0] = op_dst;
 3054     n1->_opnds[1] = op_src;
 3055     n1->_bottom_type = _bottom_type;
 3056 
 3057     encodeP_shiftNode *n2 = new encodeP_shiftNode();
 3058     n2->add_req(n_region, n1);
 3059     n2->_opnds[0] = op_dst;
 3060     n2->_opnds[1] = op_dst;
 3061     n2->_bottom_type = _bottom_type;
 3062     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3063     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3064 
 3065     nodes->push(n1);
 3066     nodes->push(n2);
 3067     assert(!(ra_->is_oop(this)), "sanity"); // This is not supposed to be GC'ed.
 3068   %}
 3069 
 3070   enc_class postalloc_expand_decode_oop(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 3071     decodeN_shiftNode *n_shift    = new decodeN_shiftNode();
 3072     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 3073 
 3074     n_compare->add_req(n_region, n_src);
 3075     n_compare->_opnds[0] = op_crx;
 3076     n_compare->_opnds[1] = op_src;
 3077     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 3078 
 3079     n_shift->add_req(n_region, n_src);
 3080     n_shift->_opnds[0] = op_dst;
 3081     n_shift->_opnds[1] = op_src;
 3082     n_shift->_bottom_type = _bottom_type;
 3083 
 3084     if (VM_Version::has_isel()) {
 3085       // use isel instruction with Power 7
 3086 
 3087       decodeN_addNode *n_add_base = new decodeN_addNode();
 3088       n_add_base->add_req(n_region, n_shift);
 3089       n_add_base->_opnds[0] = op_dst;
 3090       n_add_base->_opnds[1] = op_dst;
 3091       n_add_base->_bottom_type = _bottom_type;
 3092 
 3093       cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 3094       n_cond_set->add_req(n_region, n_compare, n_add_base);
 3095       n_cond_set->_opnds[0] = op_dst;
 3096       n_cond_set->_opnds[1] = op_crx;
 3097       n_cond_set->_opnds[2] = op_dst;
 3098       n_cond_set->_bottom_type = _bottom_type;
 3099 
 3100       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3101       ra_->set_oop(n_cond_set, true);
 3102 
 3103       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3104       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3105       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3106       ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3107 
 3108       nodes->push(n_compare);
 3109       nodes->push(n_shift);
 3110       nodes->push(n_add_base);
 3111       nodes->push(n_cond_set);
 3112 
 3113     } else {
 3114       // before Power 7
 3115       cond_add_baseNode *n_add_base = new cond_add_baseNode();
 3116 
 3117       n_add_base->add_req(n_region, n_compare, n_shift);
 3118       n_add_base->_opnds[0] = op_dst;
 3119       n_add_base->_opnds[1] = op_crx;
 3120       n_add_base->_opnds[2] = op_dst;
 3121       n_add_base->_bottom_type = _bottom_type;
 3122 
 3123       assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3124       ra_->set_oop(n_add_base, true);
 3125 
 3126       ra_->set_pair(n_shift->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3127       ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 3128       ra_->set_pair(n_add_base->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3129 
 3130       nodes->push(n_compare);
 3131       nodes->push(n_shift);
 3132       nodes->push(n_add_base);
 3133     }
 3134   %}
 3135 
 3136   enc_class postalloc_expand_decode_oop_not_null(iRegPdst dst, iRegNsrc src) %{
 3137     decodeN_shiftNode *n1 = new decodeN_shiftNode();
 3138     n1->add_req(n_region, n_src);
 3139     n1->_opnds[0] = op_dst;
 3140     n1->_opnds[1] = op_src;
 3141     n1->_bottom_type = _bottom_type;
 3142 
 3143     decodeN_addNode *n2 = new decodeN_addNode();
 3144     n2->add_req(n_region, n1);
 3145     n2->_opnds[0] = op_dst;
 3146     n2->_opnds[1] = op_dst;
 3147     n2->_bottom_type = _bottom_type;
 3148     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3149     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 3150 
 3151     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 3152     ra_->set_oop(n2, true);
 3153 
 3154     nodes->push(n1);
 3155     nodes->push(n2);
 3156   %}
 3157 
 3158   enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
 3159 
 3160     C2_MacroAssembler _masm(&cbuf);
 3161     int cc        = $cmp$$cmpcode;
 3162     int flags_reg = $crx$$reg;
 3163     Label done;
 3164     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3165     // Branch if not (cmp crx).
 3166     __ bc(cc_to_inverse_boint(cc), cc_to_biint(cc, flags_reg), done);
 3167     __ mr($dst$$Register, $src$$Register);
 3168     __ bind(done);
 3169   %}
 3170 
 3171   enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
 3172 
 3173     C2_MacroAssembler _masm(&cbuf);
 3174     Label done;
 3175     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3176     // Branch if not (cmp crx).
 3177     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 3178     __ li($dst$$Register, $src$$constant);
 3179     __ bind(done);
 3180   %}
 3181 
 3182   // This enc_class is needed so that scheduler gets proper
 3183   // input mapping for latency computation.
 3184   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 3185     C2_MacroAssembler _masm(&cbuf);
 3186     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
 3187   %}
 3188 
 3189   enc_class enc_convI2B_regI__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3190 
 3191     C2_MacroAssembler _masm(&cbuf);
 3192 
 3193     Label done;
 3194     __ cmpwi($crx$$CondRegister, $src$$Register, 0);
 3195     __ li($dst$$Register, $zero$$constant);
 3196     __ beq($crx$$CondRegister, done);
 3197     __ li($dst$$Register, $notzero$$constant);
 3198     __ bind(done);
 3199   %}
 3200 
 3201   enc_class enc_convP2B_regP__cmove(iRegIdst dst, iRegPsrc src, flagsReg crx, immI16 zero, immI16 notzero) %{
 3202 
 3203     C2_MacroAssembler _masm(&cbuf);
 3204 
 3205     Label done;
 3206     __ cmpdi($crx$$CondRegister, $src$$Register, 0);
 3207     __ li($dst$$Register, $zero$$constant);
 3208     __ beq($crx$$CondRegister, done);
 3209     __ li($dst$$Register, $notzero$$constant);
 3210     __ bind(done);
 3211   %}
 3212 
 3213   enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
 3214 
 3215     C2_MacroAssembler _masm(&cbuf);
 3216     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 3217     Label done;
 3218     __ bso($crx$$CondRegister, done);
 3219     __ ld($dst$$Register, Idisp, $mem$$base$$Register);
 3220     __ bind(done);
 3221   %}
 3222 
 3223   enc_class enc_cmove_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
 3224 
 3225     C2_MacroAssembler _masm(&cbuf);
 3226     Label done;
 3227     __ bso($crx$$CondRegister, done);
 3228     __ mffprd($dst$$Register, $src$$FloatRegister);
 3229     __ bind(done);
 3230   %}
 3231 
 3232   enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3233 
 3234     C2_MacroAssembler _masm(&cbuf);
 3235     Label d;   // dummy
 3236     __ bind(d);
 3237     Label* p = ($lbl$$label);
 3238     // `p' is `NULL' when this encoding class is used only to
 3239     // determine the size of the encoded instruction.
 3240     Label& l = (NULL == p)? d : *(p);
 3241     int cc = $cmp$$cmpcode;
 3242     int flags_reg = $crx$$reg;
 3243     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 3244     int bhint = Assembler::bhintNoHint;
 3245 
 3246     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3247       if (_prob <= PROB_NEVER) {
 3248         bhint = Assembler::bhintIsNotTaken;
 3249       } else if (_prob >= PROB_ALWAYS) {
 3250         bhint = Assembler::bhintIsTaken;
 3251       }
 3252     }
 3253 
 3254     __ bc(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3255           cc_to_biint(cc, flags_reg),
 3256           l);
 3257   %}
 3258 
 3259   enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
 3260     // The scheduler doesn't know about branch shortening, so we set the opcode
 3261     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
 3262 
 3263     C2_MacroAssembler _masm(&cbuf);
 3264     Label d;    // dummy
 3265     __ bind(d);
 3266     Label* p = ($lbl$$label);
 3267     // `p' is `NULL' when this encoding class is used only to
 3268     // determine the size of the encoded instruction.
 3269     Label& l = (NULL == p)? d : *(p);
 3270     int cc = $cmp$$cmpcode;
 3271     int flags_reg = $crx$$reg;
 3272     int bhint = Assembler::bhintNoHint;
 3273 
 3274     if (UseStaticBranchPredictionForUncommonPathsPPC64) {
 3275       if (_prob <= PROB_NEVER) {
 3276         bhint = Assembler::bhintIsNotTaken;
 3277       } else if (_prob >= PROB_ALWAYS) {
 3278         bhint = Assembler::bhintIsTaken;
 3279       }
 3280     }
 3281 
 3282     // Tell the conditional far branch to optimize itself when being relocated.
 3283     __ bc_far(Assembler::add_bhint_to_boint(bhint, cc_to_boint(cc)),
 3284                   cc_to_biint(cc, flags_reg),
 3285                   l,
 3286                   MacroAssembler::bc_far_optimize_on_relocate);
 3287   %}
 3288 
 3289   // Postalloc expand emitter for loading a replicatef float constant from
 3290   // the method's TOC.
 3291   // Enc_class needed as consttanttablebase is not supported by postalloc
 3292   // expand.
 3293   enc_class postalloc_expand_load_replF_constant(iRegLdst dst, immF src, iRegLdst toc) %{
 3294     // Create new nodes.
 3295 
 3296     // Make an operand with the bit pattern to load as float.
 3297     immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
 3298 
 3299     loadConLNodesTuple loadConLNodes =
 3300       loadConLNodesTuple_create(ra_, n_toc, op_repl,
 3301                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3302 
 3303     // Push new nodes.
 3304     if (loadConLNodes._large_hi) nodes->push(loadConLNodes._large_hi);
 3305     if (loadConLNodes._last)     nodes->push(loadConLNodes._last);
 3306 
 3307     assert(nodes->length() >= 1, "must have created at least 1 node");
 3308     assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
 3309   %}
 3310 
 3311   enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc, iRegLdst tmp) %{
 3312     // Create new nodes.
 3313 
 3314     // Make an operand with the bit pattern to load as float.
 3315     immLOper *op_repl = new  immLOper((jlong)replicate_immF(op_src->constantF()));
 3316     immI_0Oper *op_zero = new  immI_0Oper(0);
 3317 
 3318     loadConLReplicatedNodesTuple loadConLNodes =
 3319       loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
 3320                                 ra_->get_reg_second(n_tmp), ra_->get_reg_first(n_tmp),
 3321                                 ra_->get_reg_second(this), ra_->get_reg_first(this));
 3322 
 3323     // Push new nodes.
 3324     if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
 3325     if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
 3326     if (loadConLNodes._moved)    { nodes->push(loadConLNodes._moved); }
 3327     if (loadConLNodes._last)     { nodes->push(loadConLNodes._last); }
 3328 
 3329     assert(nodes->length() >= 1, "must have created at least 1 node");
 3330   %}
 3331 
 3332   // This enc_class is needed so that scheduler gets proper
 3333   // input mapping for latency computation.
 3334   enc_class enc_poll(immI dst, iRegLdst poll) %{
 3335     // Fake operand dst needed for PPC scheduler.
 3336     assert($dst$$constant == 0x0, "dst must be 0x0");
 3337 
 3338     C2_MacroAssembler _masm(&cbuf);
 3339     // Mark the code position where the load from the safepoint
 3340     // polling page was emitted as relocInfo::poll_type.
 3341     __ relocate(relocInfo::poll_type);
 3342     __ load_from_polling_page($poll$$Register);
 3343   %}
 3344 
 3345   // A Java static call or a runtime call.
 3346   //
 3347   // Branch-and-link relative to a trampoline.
 3348   // The trampoline loads the target address and does a long branch to there.
 3349   // In case we call java, the trampoline branches to a interpreter_stub
 3350   // which loads the inline cache and the real call target from the constant pool.
 3351   //
 3352   // This basically looks like this:
 3353   //
 3354   // >>>> consts      -+  -+
 3355   //                   |   |- offset1
 3356   // [call target1]    | <-+
 3357   // [IC cache]        |- offset2
 3358   // [call target2] <--+
 3359   //
 3360   // <<<< consts
 3361   // >>>> insts
 3362   //
 3363   // bl offset16               -+  -+             ??? // How many bits available?
 3364   //                            |   |
 3365   // <<<< insts                 |   |
 3366   // >>>> stubs                 |   |
 3367   //                            |   |- trampoline_stub_Reloc
 3368   // trampoline stub:           | <-+
 3369   //   r2 = toc                 |
 3370   //   r2 = [r2 + offset1]      |       // Load call target1 from const section
 3371   //   mtctr r2                 |
 3372   //   bctr                     |- static_stub_Reloc
 3373   // comp_to_interp_stub:   <---+
 3374   //   r1 = toc
 3375   //   ICreg = [r1 + IC_offset]         // Load IC from const section
 3376   //   r1    = [r1 + offset2]           // Load call target2 from const section
 3377   //   mtctr r1
 3378   //   bctr
 3379   //
 3380   // <<<< stubs
 3381   //
 3382   // The call instruction in the code either
 3383   // - Branches directly to a compiled method if the offset is encodable in instruction.
 3384   // - Branches to the trampoline stub if the offset to the compiled method is not encodable.
 3385   // - Branches to the compiled_to_interp stub if the target is interpreted.
 3386   //
 3387   // Further there are three relocations from the loads to the constants in
 3388   // the constant section.
 3389   //
 3390   // Usage of r1 and r2 in the stubs allows to distinguish them.
 3391   enc_class enc_java_static_call(method meth) %{
 3392 
 3393     C2_MacroAssembler _masm(&cbuf);
 3394     address entry_point = (address)$meth$$method;
 3395 
 3396     if (!_method) {
 3397       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
 3398       emit_call_with_trampoline_stub(_masm, entry_point, relocInfo::runtime_call_type);
 3399     } else {
 3400       // Remember the offset not the address.
 3401       const int start_offset = __ offset();
 3402 
 3403       // The trampoline stub.
 3404       // No entry point given, use the current pc.
 3405       // Make sure branch fits into
 3406       if (entry_point == 0) entry_point = __ pc();
 3407 
 3408       // Put the entry point as a constant into the constant pool.
 3409       const address entry_point_toc_addr = __ address_constant(entry_point, RelocationHolder::none);
 3410       if (entry_point_toc_addr == NULL) {
 3411         ciEnv::current()->record_out_of_memory_failure();
 3412         return;
 3413       }
 3414       const int entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
 3415 
 3416       // Emit the trampoline stub which will be related to the branch-and-link below.
 3417       CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
 3418       if (ciEnv::current()->failing()) { return; } // Code cache may be full.
 3419       int method_index = resolved_method_index(cbuf);
 3420       __ relocate(_optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 3421                   : static_call_Relocation::spec(method_index));
 3422 
 3423       // The real call.
 3424       // Note: At this point we do not have the address of the trampoline
 3425       // stub, and the entry point might be too far away for bl, so __ pc()
 3426       // serves as dummy and the bl will be patched later.
 3427       cbuf.set_insts_mark();
 3428       __ bl(__ pc());  // Emits a relocation.
 3429 
 3430       // The stub for call to interpreter.
 3431       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 3432       if (stub == NULL) {
 3433         ciEnv::current()->record_failure("CodeCache is full");
 3434         return;
 3435       }
 3436     }
 3437   %}
 3438 
 3439   // Second node of expanded dynamic call - the call.
 3440   enc_class enc_java_dynamic_call_sched(method meth) %{
 3441 
 3442     C2_MacroAssembler _masm(&cbuf);
 3443 
 3444     if (!ra_->C->output()->in_scratch_emit_size()) {
 3445       // Create a call trampoline stub for the given method.
 3446       const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
 3447       const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
 3448       if (entry_point_const == NULL) {
 3449         ciEnv::current()->record_out_of_memory_failure();
 3450         return;
 3451       }
 3452       const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
 3453       CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
 3454       if (ra_->C->env()->failing()) { return; } // Code cache may be full.
 3455 
 3456       // Build relocation at call site with ic position as data.
 3457       assert((_load_ic_hi_node != NULL && _load_ic_node == NULL) ||
 3458              (_load_ic_hi_node == NULL && _load_ic_node != NULL),
 3459              "must have one, but can't have both");
 3460       assert((_load_ic_hi_node != NULL && _load_ic_hi_node->_cbuf_insts_offset != -1) ||
 3461              (_load_ic_node != NULL    && _load_ic_node->_cbuf_insts_offset != -1),
 3462              "must contain instruction offset");
 3463       const int virtual_call_oop_addr_offset = _load_ic_hi_node != NULL
 3464         ? _load_ic_hi_node->_cbuf_insts_offset
 3465         : _load_ic_node->_cbuf_insts_offset;
 3466       const address virtual_call_oop_addr = __ addr_at(virtual_call_oop_addr_offset);
 3467       assert(MacroAssembler::is_load_const_from_method_toc_at(virtual_call_oop_addr),
 3468              "should be load from TOC");
 3469       int method_index = resolved_method_index(cbuf);
 3470       __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index));
 3471     }
 3472 
 3473     // At this point I do not have the address of the trampoline stub,
 3474     // and the entry point might be too far away for bl. Pc() serves
 3475     // as dummy and bl will be patched later.
 3476     __ bl((address) __ pc());
 3477   %}
 3478 
 3479   // postalloc expand emitter for virtual calls.
 3480   enc_class postalloc_expand_java_dynamic_call_sched(method meth, iRegLdst toc) %{
 3481 
 3482     // Create the nodes for loading the IC from the TOC.
 3483     loadConLNodesTuple loadConLNodes_IC =
 3484       loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong)Universe::non_oop_word()),
 3485                                 OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));
 3486 
 3487     // Create the call node.
 3488     CallDynamicJavaDirectSchedNode *call = new CallDynamicJavaDirectSchedNode();
 3489     call->_method_handle_invoke = _method_handle_invoke;
 3490     call->_vtable_index      = _vtable_index;
 3491     call->_method            = _method;
 3492     call->_optimized_virtual = _optimized_virtual;
 3493     call->_tf                = _tf;
 3494     call->_entry_point       = _entry_point;
 3495     call->_cnt               = _cnt;
 3496     call->_guaranteed_safepoint = true;
 3497     call->_oop_map           = _oop_map;
 3498     call->_jvms              = _jvms;
 3499     call->_jvmadj            = _jvmadj;
 3500     call->_in_rms            = _in_rms;
 3501     call->_nesting           = _nesting;
 3502     call->_override_symbolic_info = _override_symbolic_info;
 3503 
 3504     // New call needs all inputs of old call.
 3505     // Req...
 3506     for (uint i = 0; i < req(); ++i) {
 3507       // The expanded node does not need toc any more.
 3508       // Add the inline cache constant here instead. This expresses the
 3509       // register of the inline cache must be live at the call.
 3510       // Else we would have to adapt JVMState by -1.
 3511       if (i == mach_constant_base_node_input()) {
 3512         call->add_req(loadConLNodes_IC._last);
 3513       } else {
 3514         call->add_req(in(i));
 3515       }
 3516     }
 3517     // ...as well as prec
 3518     for (uint i = req(); i < len(); ++i) {
 3519       call->add_prec(in(i));
 3520     }
 3521 
 3522     // Remember nodes loading the inline cache into r19.
 3523     call->_load_ic_hi_node = loadConLNodes_IC._large_hi;
 3524     call->_load_ic_node    = loadConLNodes_IC._small;
 3525 
 3526     // Operands for new nodes.
 3527     call->_opnds[0] = _opnds[0];
 3528     call->_opnds[1] = _opnds[1];
 3529 
 3530     // Only the inline cache is associated with a register.
 3531     assert(Matcher::inline_cache_reg() == OptoReg::Name(R19_num), "ic reg should be R19");
 3532 
 3533     // Push new nodes.
 3534     if (loadConLNodes_IC._large_hi) nodes->push(loadConLNodes_IC._large_hi);
 3535     if (loadConLNodes_IC._last)     nodes->push(loadConLNodes_IC._last);
 3536     nodes->push(call);
 3537   %}
 3538 
 3539   // Compound version of call dynamic
 3540   // Toc is only passed so that it can be used in ins_encode statement.
 3541   // In the code we have to use $constanttablebase.
 3542   enc_class enc_java_dynamic_call(method meth, iRegLdst toc) %{
 3543     C2_MacroAssembler _masm(&cbuf);
 3544     int start_offset = __ offset();
 3545 
 3546     Register Rtoc = (ra_) ? $constanttablebase : R2_TOC;
 3547 
 3548     int vtable_index = this->_vtable_index;
 3549     if (vtable_index < 0) {
 3550       // Must be invalid_vtable_index, not nonvirtual_vtable_index.
 3551       assert(vtable_index == Method::invalid_vtable_index, "correct sentinel value");
 3552       Register ic_reg = as_Register(Matcher::inline_cache_reg_encode());
 3553 
 3554       // Virtual call relocation will point to ic load.
 3555       address virtual_call_meta_addr = __ pc();
 3556       // Load a clear inline cache.
 3557       AddressLiteral empty_ic((address) Universe::non_oop_word());
 3558       bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
 3559       if (!success) {
 3560         ciEnv::current()->record_out_of_memory_failure();
 3561         return;
 3562       }
 3563       // CALL to fixup routine.  Fixup routine uses ScopeDesc info
 3564       // to determine who we intended to call.
 3565       __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
 3566       emit_call_with_trampoline_stub(_masm, (address)$meth$$method, relocInfo::none);
 3567       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3568              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3569     } else {
 3570       assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
 3571       // Go thru the vtable. Get receiver klass. Receiver already
 3572       // checked for non-null. If we'll go thru a C2I adapter, the
 3573       // interpreter expects method in R19_method.
 3574 
 3575       __ load_klass(R11_scratch1, R3);
 3576 
 3577       int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes();
 3578       int v_off = entry_offset + vtableEntry::method_offset_in_bytes();
 3579       __ li(R19_method, v_off);
 3580       __ ldx(R19_method/*method*/, R19_method/*method offset*/, R11_scratch1/*class*/);
 3581       // NOTE: for vtable dispatches, the vtable entry will never be
 3582       // null. However it may very well end up in handle_wrong_method
 3583       // if the method is abstract for the particular class.
 3584       __ ld(R11_scratch1, in_bytes(Method::from_compiled_offset()), R19_method);
 3585       // Call target. Either compiled code or C2I adapter.
 3586       __ mtctr(R11_scratch1);
 3587       __ bctrl();
 3588       assert(((MachCallDynamicJavaNode*)this)->ret_addr_offset() == __ offset() - start_offset,
 3589              "Fix constant in ret_addr_offset(), expected %d", __ offset() - start_offset);
 3590     }
 3591   %}
 3592 
 3593   // a runtime call
 3594   enc_class enc_java_to_runtime_call (method meth) %{
 3595 
 3596     C2_MacroAssembler _masm(&cbuf);
 3597     const address start_pc = __ pc();
 3598 
 3599 #if defined(ABI_ELFv2)
 3600     address entry= !($meth$$method) ? NULL : (address)$meth$$method;
 3601     __ call_c(entry, relocInfo::runtime_call_type);
 3602 #else
 3603     // The function we're going to call.
 3604     FunctionDescriptor fdtemp;
 3605     const FunctionDescriptor* fd = !($meth$$method) ? &fdtemp : (FunctionDescriptor*)$meth$$method;
 3606 
 3607     Register Rtoc = R12_scratch2;
 3608     // Calculate the method's TOC.
 3609     __ calculate_address_from_global_toc(Rtoc, __ method_toc());
 3610     // Put entry, env, toc into the constant pool, this needs up to 3 constant
 3611     // pool entries; call_c_using_toc will optimize the call.
 3612     bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
 3613     if (!success) {
 3614       ciEnv::current()->record_out_of_memory_failure();
 3615       return;
 3616     }
 3617 #endif
 3618 
 3619     // Check the ret_addr_offset.
 3620     assert(((MachCallRuntimeNode*)this)->ret_addr_offset() ==  __ last_calls_return_pc() - start_pc,
 3621            "Fix constant in ret_addr_offset()");
 3622   %}
 3623 
 3624   // Move to ctr for leaf call.
 3625   // This enc_class is needed so that scheduler gets proper
 3626   // input mapping for latency computation.
 3627   enc_class enc_leaf_call_mtctr(iRegLsrc src) %{
 3628     C2_MacroAssembler _masm(&cbuf);
 3629     __ mtctr($src$$Register);
 3630   %}
 3631 
 3632   // Postalloc expand emitter for runtime leaf calls.
 3633   enc_class postalloc_expand_java_to_runtime_call(method meth, iRegLdst toc) %{
 3634     loadConLNodesTuple loadConLNodes_Entry;
 3635 #if defined(ABI_ELFv2)
 3636     jlong entry_address = (jlong) this->entry_point();
 3637     assert(entry_address, "need address here");
 3638     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3639                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3640 #else
 3641     // Get the struct that describes the function we are about to call.
 3642     FunctionDescriptor* fd = (FunctionDescriptor*) this->entry_point();
 3643     assert(fd, "need fd here");
 3644     jlong entry_address = (jlong) fd->entry();
 3645     // new nodes
 3646     loadConLNodesTuple loadConLNodes_Env;
 3647     loadConLNodesTuple loadConLNodes_Toc;
 3648 
 3649     // Create nodes and operands for loading the entry point.
 3650     loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
 3651                                                     OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 3652 
 3653 
 3654     // Create nodes and operands for loading the env pointer.
 3655     if (fd->env() != NULL) {
 3656       loadConLNodes_Env = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->env()),
 3657                                                     OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3658     } else {
 3659       loadConLNodes_Env._large_hi = NULL;
 3660       loadConLNodes_Env._large_lo = NULL;
 3661       loadConLNodes_Env._small    = NULL;
 3662       loadConLNodes_Env._last = new loadConL16Node();
 3663       loadConLNodes_Env._last->_opnds[0] = new iRegLdstOper();
 3664       loadConLNodes_Env._last->_opnds[1] = new immL16Oper(0);
 3665       ra_->set_pair(loadConLNodes_Env._last->_idx, OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
 3666     }
 3667 
 3668     // Create nodes and operands for loading the Toc point.
 3669     loadConLNodes_Toc = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->toc()),
 3670                                                   OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
 3671 #endif // ABI_ELFv2
 3672     // mtctr node
 3673     MachNode *mtctr = new CallLeafDirect_mtctrNode();
 3674 
 3675     assert(loadConLNodes_Entry._last != NULL, "entry must exist");
 3676     mtctr->add_req(0, loadConLNodes_Entry._last);
 3677 
 3678     mtctr->_opnds[0] = new iRegLdstOper();
 3679     mtctr->_opnds[1] = new iRegLdstOper();
 3680 
 3681     // call node
 3682     MachCallLeafNode *call = new CallLeafDirectNode();
 3683 
 3684     call->_opnds[0] = _opnds[0];
 3685     call->_opnds[1] = new methodOper((intptr_t) entry_address); // May get set later.
 3686 
 3687     // Make the new call node look like the old one.
 3688     call->_name        = _name;
 3689     call->_tf          = _tf;
 3690     call->_entry_point = _entry_point;
 3691     call->_cnt         = _cnt;
 3692     call->_guaranteed_safepoint = false;
 3693     call->_oop_map     = _oop_map;
 3694     guarantee(!_jvms, "You must clone the jvms and adapt the offsets by fix_jvms().");
 3695     call->_jvms        = NULL;
 3696     call->_jvmadj      = _jvmadj;
 3697     call->_in_rms      = _in_rms;
 3698     call->_nesting     = _nesting;
 3699 
 3700     // New call needs all inputs of old call.
 3701     // Req...
 3702     for (uint i = 0; i < req(); ++i) {
 3703       if (i != mach_constant_base_node_input()) {
 3704         call->add_req(in(i));
 3705       }
 3706     }
 3707 
 3708     // These must be reqired edges, as the registers are live up to
 3709     // the call. Else the constants are handled as kills.
 3710     call->add_req(mtctr);
 3711 #if !defined(ABI_ELFv2)
 3712     call->add_req(loadConLNodes_Env._last);
 3713     call->add_req(loadConLNodes_Toc._last);
 3714 #endif
 3715 
 3716     // ...as well as prec
 3717     for (uint i = req(); i < len(); ++i) {
 3718       call->add_prec(in(i));
 3719     }
 3720 
 3721     // registers
 3722     ra_->set1(mtctr->_idx, OptoReg::Name(SR_CTR_num));
 3723 
 3724     // Insert the new nodes.
 3725     if (loadConLNodes_Entry._large_hi) nodes->push(loadConLNodes_Entry._large_hi);
 3726     if (loadConLNodes_Entry._last)     nodes->push(loadConLNodes_Entry._last);
 3727 #if !defined(ABI_ELFv2)
 3728     if (loadConLNodes_Env._large_hi)   nodes->push(loadConLNodes_Env._large_hi);
 3729     if (loadConLNodes_Env._last)       nodes->push(loadConLNodes_Env._last);
 3730     if (loadConLNodes_Toc._large_hi)   nodes->push(loadConLNodes_Toc._large_hi);
 3731     if (loadConLNodes_Toc._last)       nodes->push(loadConLNodes_Toc._last);
 3732 #endif
 3733     nodes->push(mtctr);
 3734     nodes->push(call);
 3735   %}
 3736 %}
 3737 
 3738 //----------FRAME--------------------------------------------------------------
 3739 // Definition of frame structure and management information.
 3740 
 3741 frame %{
 3742   // These two registers define part of the calling convention between
 3743   // compiled code and the interpreter.
 3744 
 3745   // Inline Cache Register or method for I2C.
 3746   inline_cache_reg(R19); // R19_method
 3747 
 3748   // Optional: name the operand used by cisc-spilling to access
 3749   // [stack_pointer + offset].
 3750   cisc_spilling_operand_name(indOffset);
 3751 
 3752   // Number of stack slots consumed by a Monitor enter.
 3753   sync_stack_slots((frame::jit_monitor_size / VMRegImpl::stack_slot_size));
 3754 
 3755   // Compiled code's Frame Pointer.
 3756   frame_pointer(R1); // R1_SP
 3757 
 3758   // Interpreter stores its frame pointer in a register which is
 3759   // stored to the stack by I2CAdaptors. I2CAdaptors convert from
 3760   // interpreted java to compiled java.
 3761   //
 3762   // R14_state holds pointer to caller's cInterpreter.
 3763   interpreter_frame_pointer(R14); // R14_state
 3764 
 3765   stack_alignment(frame::alignment_in_bytes);
 3766 
 3767   // Number of outgoing stack slots killed above the
 3768   // out_preserve_stack_slots for calls to C. Supports the var-args
 3769   // backing area for register parms.
 3770   //
 3771   varargs_C_out_slots_killed(((frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size));
 3772 
 3773   // The after-PROLOG location of the return address. Location of
 3774   // return address specifies a type (REG or STACK) and a number
 3775   // representing the register number (i.e. - use a register name) or
 3776   // stack slot.
 3777   //
 3778   // A: Link register is stored in stack slot ...
 3779   // M:  ... but it's in the caller's frame according to PPC-64 ABI.
 3780   // J: Therefore, we make sure that the link register is also in R11_scratch1
 3781   //    at the end of the prolog.
 3782   // B: We use R20, now.
 3783   //return_addr(REG R20);
 3784 
 3785   // G: After reading the comments made by all the luminaries on their
 3786   //    failure to tell the compiler where the return address really is,
 3787   //    I hardly dare to try myself.  However, I'm convinced it's in slot
 3788   //    4 what apparently works and saves us some spills.
 3789   return_addr(STACK 4);
 3790 
 3791   // Location of native (C/C++) and interpreter return values. This
 3792   // is specified to be the same as Java. In the 32-bit VM, long
 3793   // values are actually returned from native calls in O0:O1 and
 3794   // returned to the interpreter in I0:I1. The copying to and from
 3795   // the register pairs is done by the appropriate call and epilog
 3796   // opcodes. This simplifies the register allocator.
 3797   c_return_value %{
 3798     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3799             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3800             "only return normal values");
 3801     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3802     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3803     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3804     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3805   %}
 3806 
 3807   // Location of compiled Java return values.  Same as C
 3808   return_value %{
 3809     assert((ideal_reg >= Op_RegI && ideal_reg <= Op_RegL) ||
 3810             (ideal_reg == Op_RegN && CompressedOops::base() == NULL && CompressedOops::shift() == 0),
 3811             "only return normal values");
 3812     // enum names from opcodes.hpp:    Op_Node Op_Set Op_RegN       Op_RegI       Op_RegP       Op_RegF       Op_RegD       Op_RegL
 3813     static int typeToRegLo[Op_RegL+1] = { 0,   0,     R3_num,   R3_num,   R3_num,   F1_num,   F1_num,   R3_num };
 3814     static int typeToRegHi[Op_RegL+1] = { 0,   0,     OptoReg::Bad, R3_H_num, R3_H_num, OptoReg::Bad, F1_H_num, R3_H_num };
 3815     return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]);
 3816   %}
 3817 %}
 3818 
 3819 
 3820 //----------ATTRIBUTES---------------------------------------------------------
 3821 
 3822 //----------Operand Attributes-------------------------------------------------
 3823 op_attrib op_cost(1);          // Required cost attribute.
 3824 
 3825 //----------Instruction Attributes---------------------------------------------
 3826 
 3827 // Cost attribute. required.
 3828 ins_attrib ins_cost(DEFAULT_COST);
 3829 
 3830 // Is this instruction a non-matching short branch variant of some
 3831 // long branch? Not required.
 3832 ins_attrib ins_short_branch(0);
 3833 
 3834 ins_attrib ins_is_TrapBasedCheckNode(true);
 3835 
 3836 // Number of constants.
 3837 // This instruction uses the given number of constants
 3838 // (optional attribute).
 3839 // This is needed to determine in time whether the constant pool will
 3840 // exceed 4000 entries. Before postalloc_expand the overall number of constants
 3841 // is determined. It's also used to compute the constant pool size
 3842 // in Output().
 3843 ins_attrib ins_num_consts(0);
 3844 
 3845 // Required alignment attribute (must be a power of 2) specifies the
 3846 // alignment that some part of the instruction (not necessarily the
 3847 // start) requires. If > 1, a compute_padding() function must be
 3848 // provided for the instruction.
 3849 ins_attrib ins_alignment(1);
 3850 
 3851 // Enforce/prohibit rematerializations.
 3852 // - If an instruction is attributed with 'ins_cannot_rematerialize(true)'
 3853 //   then rematerialization of that instruction is prohibited and the
 3854 //   instruction's value will be spilled if necessary.
 3855 //   Causes that MachNode::rematerialize() returns false.
 3856 // - If an instruction is attributed with 'ins_should_rematerialize(true)'
 3857 //   then rematerialization should be enforced and a copy of the instruction
 3858 //   should be inserted if possible; rematerialization is not guaranteed.
 3859 //   Note: this may result in rematerializations in front of every use.
 3860 //   Causes that MachNode::rematerialize() can return true.
 3861 // (optional attribute)
 3862 ins_attrib ins_cannot_rematerialize(false);
 3863 ins_attrib ins_should_rematerialize(false);
 3864 
 3865 // Instruction has variable size depending on alignment.
 3866 ins_attrib ins_variable_size_depending_on_alignment(false);
 3867 
 3868 // Instruction is a nop.
 3869 ins_attrib ins_is_nop(false);
 3870 
 3871 // Instruction is mapped to a MachIfFastLock node (instead of MachFastLock).
 3872 ins_attrib ins_use_mach_if_fast_lock_node(false);
 3873 
 3874 // Field for the toc offset of a constant.
 3875 //
 3876 // This is needed if the toc offset is not encodable as an immediate in
 3877 // the PPC load instruction. If so, the upper (hi) bits of the offset are
 3878 // added to the toc, and from this a load with immediate is performed.
 3879 // With postalloc expand, we get two nodes that require the same offset
 3880 // but which don't know about each other. The offset is only known
 3881 // when the constant is added to the constant pool during emitting.
 3882 // It is generated in the 'hi'-node adding the upper bits, and saved
 3883 // in this node.  The 'lo'-node has a link to the 'hi'-node and reads
 3884 // the offset from there when it gets encoded.
 3885 ins_attrib ins_field_const_toc_offset(0);
 3886 ins_attrib ins_field_const_toc_offset_hi_node(0);
 3887 
 3888 // A field that can hold the instructions offset in the code buffer.
 3889 // Set in the nodes emitter.
 3890 ins_attrib ins_field_cbuf_insts_offset(-1);
 3891 
 3892 // Fields for referencing a call's load-IC-node.
 3893 // If the toc offset can not be encoded as an immediate in a load, we
 3894 // use two nodes.
 3895 ins_attrib ins_field_load_ic_hi_node(0);
 3896 ins_attrib ins_field_load_ic_node(0);
 3897 
 3898 //----------OPERANDS-----------------------------------------------------------
 3899 // Operand definitions must precede instruction definitions for correct
 3900 // parsing in the ADLC because operands constitute user defined types
 3901 // which are used in instruction definitions.
 3902 //
 3903 // Formats are generated automatically for constants and base registers.
 3904 
 3905 operand vecX() %{
 3906   constraint(ALLOC_IN_RC(vs_reg));
 3907   match(VecX);
 3908 
 3909   format %{ %}
 3910   interface(REG_INTER);
 3911 %}
 3912 
 3913 //----------Simple Operands----------------------------------------------------
 3914 // Immediate Operands
 3915 
 3916 // Integer Immediate: 32-bit
 3917 operand immI() %{
 3918   match(ConI);
 3919   op_cost(40);
 3920   format %{ %}
 3921   interface(CONST_INTER);
 3922 %}
 3923 
 3924 operand immI8() %{
 3925   predicate(Assembler::is_simm(n->get_int(), 8));
 3926   op_cost(0);
 3927   match(ConI);
 3928   format %{ %}
 3929   interface(CONST_INTER);
 3930 %}
 3931 
 3932 // Integer Immediate: 16-bit
 3933 operand immI16() %{
 3934   predicate(Assembler::is_simm(n->get_int(), 16));
 3935   op_cost(0);
 3936   match(ConI);
 3937   format %{ %}
 3938   interface(CONST_INTER);
 3939 %}
 3940 
 3941 // Integer Immediate: 32-bit, where lowest 16 bits are 0x0000.
 3942 operand immIhi16() %{
 3943   predicate(((n->get_int() & 0xffff0000) != 0) && ((n->get_int() & 0xffff) == 0));
 3944   match(ConI);
 3945   op_cost(0);
 3946   format %{ %}
 3947   interface(CONST_INTER);
 3948 %}
 3949 
 3950 // Integer Immediate: 32-bit immediate for prefixed addi and load/store.
 3951 operand immI32() %{
 3952   predicate(PowerArchitecturePPC64 >= 10);
 3953   op_cost(0);
 3954   match(ConI);
 3955   format %{ %}
 3956   interface(CONST_INTER);
 3957 %}
 3958 
 3959 operand immInegpow2() %{
 3960   predicate(is_power_of_2(-(juint)(n->get_int())));
 3961   match(ConI);
 3962   op_cost(0);
 3963   format %{ %}
 3964   interface(CONST_INTER);
 3965 %}
 3966 
 3967 operand immIpow2minus1() %{
 3968   predicate(is_power_of_2((juint)(n->get_int()) + 1u));
 3969   match(ConI);
 3970   op_cost(0);
 3971   format %{ %}
 3972   interface(CONST_INTER);
 3973 %}
 3974 
 3975 operand immIpowerOf2() %{
 3976   predicate(is_power_of_2((juint)(n->get_int())));
 3977   match(ConI);
 3978   op_cost(0);
 3979   format %{ %}
 3980   interface(CONST_INTER);
 3981 %}
 3982 
 3983 // Unsigned Integer Immediate: the values 0-31
 3984 operand uimmI5() %{
 3985   predicate(Assembler::is_uimm(n->get_int(), 5));
 3986   match(ConI);
 3987   op_cost(0);
 3988   format %{ %}
 3989   interface(CONST_INTER);
 3990 %}
 3991 
 3992 // Unsigned Integer Immediate: 6-bit
 3993 operand uimmI6() %{
 3994   predicate(Assembler::is_uimm(n->get_int(), 6));
 3995   match(ConI);
 3996   op_cost(0);
 3997   format %{ %}
 3998   interface(CONST_INTER);
 3999 %}
 4000 
 4001 // Unsigned Integer Immediate:  6-bit int, greater than 32
 4002 operand uimmI6_ge32() %{
 4003   predicate(Assembler::is_uimm(n->get_int(), 6) && n->get_int() >= 32);
 4004   match(ConI);
 4005   op_cost(0);
 4006   format %{ %}
 4007   interface(CONST_INTER);
 4008 %}
 4009 
 4010 // Unsigned Integer Immediate: 15-bit
 4011 operand uimmI15() %{
 4012   predicate(Assembler::is_uimm(n->get_int(), 15));
 4013   match(ConI);
 4014   op_cost(0);
 4015   format %{ %}
 4016   interface(CONST_INTER);
 4017 %}
 4018 
 4019 // Unsigned Integer Immediate: 16-bit
 4020 operand uimmI16() %{
 4021   predicate(Assembler::is_uimm(n->get_int(), 16));
 4022   match(ConI);
 4023   op_cost(0);
 4024   format %{ %}
 4025   interface(CONST_INTER);
 4026 %}
 4027 
 4028 // constant 'int 0'.
 4029 operand immI_0() %{
 4030   predicate(n->get_int() == 0);
 4031   match(ConI);
 4032   op_cost(0);
 4033   format %{ %}
 4034   interface(CONST_INTER);
 4035 %}
 4036 
 4037 // constant 'int 1'.
 4038 operand immI_1() %{
 4039   predicate(n->get_int() == 1);
 4040   match(ConI);
 4041   op_cost(0);
 4042   format %{ %}
 4043   interface(CONST_INTER);
 4044 %}
 4045 
 4046 // constant 'int -1'.
 4047 operand immI_minus1() %{
 4048   predicate(n->get_int() == -1);
 4049   match(ConI);
 4050   op_cost(0);
 4051   format %{ %}
 4052   interface(CONST_INTER);
 4053 %}
 4054 
 4055 // int value 16.
 4056 operand immI_16() %{
 4057   predicate(n->get_int() == 16);
 4058   match(ConI);
 4059   op_cost(0);
 4060   format %{ %}
 4061   interface(CONST_INTER);
 4062 %}
 4063 
 4064 // int value 24.
 4065 operand immI_24() %{
 4066   predicate(n->get_int() == 24);
 4067   match(ConI);
 4068   op_cost(0);
 4069   format %{ %}
 4070   interface(CONST_INTER);
 4071 %}
 4072 
 4073 // Compressed oops constants
 4074 // Pointer Immediate
 4075 operand immN() %{
 4076   match(ConN);
 4077 
 4078   op_cost(10);
 4079   format %{ %}
 4080   interface(CONST_INTER);
 4081 %}
 4082 
 4083 // NULL Pointer Immediate
 4084 operand immN_0() %{
 4085   predicate(n->get_narrowcon() == 0);
 4086   match(ConN);
 4087 
 4088   op_cost(0);
 4089   format %{ %}
 4090   interface(CONST_INTER);
 4091 %}
 4092 
 4093 // Compressed klass constants
 4094 operand immNKlass() %{
 4095   match(ConNKlass);
 4096 
 4097   op_cost(0);
 4098   format %{ %}
 4099   interface(CONST_INTER);
 4100 %}
 4101 
 4102 // This operand can be used to avoid matching of an instruct
 4103 // with chain rule.
 4104 operand immNKlass_NM() %{
 4105   match(ConNKlass);
 4106   predicate(false);
 4107   op_cost(0);
 4108   format %{ %}
 4109   interface(CONST_INTER);
 4110 %}
 4111 
 4112 // Pointer Immediate: 64-bit
 4113 operand immP() %{
 4114   match(ConP);
 4115   op_cost(0);
 4116   format %{ %}
 4117   interface(CONST_INTER);
 4118 %}
 4119 
 4120 // Operand to avoid match of loadConP.
 4121 // This operand can be used to avoid matching of an instruct
 4122 // with chain rule.
 4123 operand immP_NM() %{
 4124   match(ConP);
 4125   predicate(false);
 4126   op_cost(0);
 4127   format %{ %}
 4128   interface(CONST_INTER);
 4129 %}
 4130 
 4131 // costant 'pointer 0'.
 4132 operand immP_0() %{
 4133   predicate(n->get_ptr() == 0);
 4134   match(ConP);
 4135   op_cost(0);
 4136   format %{ %}
 4137   interface(CONST_INTER);
 4138 %}
 4139 
 4140 // pointer 0x0 or 0x1
 4141 operand immP_0or1() %{
 4142   predicate((n->get_ptr() == 0) || (n->get_ptr() == 1));
 4143   match(ConP);
 4144   op_cost(0);
 4145   format %{ %}
 4146   interface(CONST_INTER);
 4147 %}
 4148 
 4149 operand immL() %{
 4150   match(ConL);
 4151   op_cost(40);
 4152   format %{ %}
 4153   interface(CONST_INTER);
 4154 %}
 4155 
 4156 operand immLmax30() %{
 4157   predicate((n->get_long() <= 30));
 4158   match(ConL);
 4159   op_cost(0);
 4160   format %{ %}
 4161   interface(CONST_INTER);
 4162 %}
 4163 
 4164 // Long Immediate: 16-bit
 4165 operand immL16() %{
 4166   predicate(Assembler::is_simm(n->get_long(), 16));
 4167   match(ConL);
 4168   op_cost(0);
 4169   format %{ %}
 4170   interface(CONST_INTER);
 4171 %}
 4172 
 4173 // Long Immediate: 16-bit, 4-aligned
 4174 operand immL16Alg4() %{
 4175   predicate(Assembler::is_simm(n->get_long(), 16) && ((n->get_long() & 0x3) == 0));
 4176   match(ConL);
 4177   op_cost(0);
 4178   format %{ %}
 4179   interface(CONST_INTER);
 4180 %}
 4181 
 4182 // Long Immediate: 32-bit, where lowest 16 bits are 0x0000.
 4183 operand immL32hi16() %{
 4184   predicate(Assembler::is_simm(n->get_long(), 32) && ((n->get_long() & 0xffffL) == 0L));
 4185   match(ConL);
 4186   op_cost(0);
 4187   format %{ %}
 4188   interface(CONST_INTER);
 4189 %}
 4190 
 4191 // Long Immediate: 32-bit
 4192 operand immL32() %{
 4193   predicate(Assembler::is_simm(n->get_long(), 32));
 4194   match(ConL);
 4195   op_cost(0);
 4196   format %{ %}
 4197   interface(CONST_INTER);
 4198 %}
 4199 
 4200 // Long Immediate: 34-bit, immediate field in prefixed addi and load/store.
 4201 operand immL34() %{
 4202   predicate(PowerArchitecturePPC64 >= 10 && Assembler::is_simm(n->get_long(), 34));
 4203   match(ConL);
 4204   op_cost(0);
 4205   format %{ %}
 4206   interface(CONST_INTER);
 4207 %}
 4208 
 4209 // Long Immediate: 64-bit, where highest 16 bits are not 0x0000.
 4210 operand immLhighest16() %{
 4211   predicate((n->get_long() & 0xffff000000000000L) != 0L && (n->get_long() & 0x0000ffffffffffffL) == 0L);
 4212   match(ConL);
 4213   op_cost(0);
 4214   format %{ %}
 4215   interface(CONST_INTER);
 4216 %}
 4217 
 4218 operand immLnegpow2() %{
 4219   predicate(is_power_of_2(-(julong)(n->get_long())));
 4220   match(ConL);
 4221   op_cost(0);
 4222   format %{ %}
 4223   interface(CONST_INTER);
 4224 %}
 4225 
 4226 operand immLpow2minus1() %{
 4227   predicate(is_power_of_2((julong)(n->get_long()) + 1ull));
 4228   match(ConL);
 4229   op_cost(0);
 4230   format %{ %}
 4231   interface(CONST_INTER);
 4232 %}
 4233 
 4234 // constant 'long 0'.
 4235 operand immL_0() %{
 4236   predicate(n->get_long() == 0L);
 4237   match(ConL);
 4238   op_cost(0);
 4239   format %{ %}
 4240   interface(CONST_INTER);
 4241 %}
 4242 
 4243 // constat ' long -1'.
 4244 operand immL_minus1() %{
 4245   predicate(n->get_long() == -1L);
 4246   match(ConL);
 4247   op_cost(0);
 4248   format %{ %}
 4249   interface(CONST_INTER);
 4250 %}
 4251 
 4252 // Long Immediate: low 32-bit mask
 4253 operand immL_32bits() %{
 4254   predicate(n->get_long() == 0xFFFFFFFFL);
 4255   match(ConL);
 4256   op_cost(0);
 4257   format %{ %}
 4258   interface(CONST_INTER);
 4259 %}
 4260 
 4261 // Unsigned Long Immediate: 16-bit
 4262 operand uimmL16() %{
 4263   predicate(Assembler::is_uimm(n->get_long(), 16));
 4264   match(ConL);
 4265   op_cost(0);
 4266   format %{ %}
 4267   interface(CONST_INTER);
 4268 %}
 4269 
 4270 // Float Immediate
 4271 operand immF() %{
 4272   match(ConF);
 4273   op_cost(40);
 4274   format %{ %}
 4275   interface(CONST_INTER);
 4276 %}
 4277 
 4278 // Float Immediate: +0.0f.
 4279 operand immF_0() %{
 4280   predicate(jint_cast(n->getf()) == 0);
 4281   match(ConF);
 4282 
 4283   op_cost(0);
 4284   format %{ %}
 4285   interface(CONST_INTER);
 4286 %}
 4287 
 4288 // Double Immediate
 4289 operand immD() %{
 4290   match(ConD);
 4291   op_cost(40);
 4292   format %{ %}
 4293   interface(CONST_INTER);
 4294 %}
 4295 
 4296 // Double Immediate: +0.0d.
 4297 operand immD_0() %{
 4298   predicate(jlong_cast(n->getd()) == 0);
 4299   match(ConD);
 4300 
 4301   op_cost(0);
 4302   format %{ %}
 4303   interface(CONST_INTER);
 4304 %}
 4305 
 4306 // Integer Register Operands
 4307 // Integer Destination Register
 4308 // See definition of reg_class bits32_reg_rw.
 4309 operand iRegIdst() %{
 4310   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4311   match(RegI);
 4312   match(rscratch1RegI);
 4313   match(rscratch2RegI);
 4314   match(rarg1RegI);
 4315   match(rarg2RegI);
 4316   match(rarg3RegI);
 4317   match(rarg4RegI);
 4318   format %{ %}
 4319   interface(REG_INTER);
 4320 %}
 4321 
 4322 // Integer Source Register
 4323 // See definition of reg_class bits32_reg_ro.
 4324 operand iRegIsrc() %{
 4325   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4326   match(RegI);
 4327   match(rscratch1RegI);
 4328   match(rscratch2RegI);
 4329   match(rarg1RegI);
 4330   match(rarg2RegI);
 4331   match(rarg3RegI);
 4332   match(rarg4RegI);
 4333   format %{ %}
 4334   interface(REG_INTER);
 4335 %}
 4336 
 4337 operand rscratch1RegI() %{
 4338   constraint(ALLOC_IN_RC(rscratch1_bits32_reg));
 4339   match(iRegIdst);
 4340   format %{ %}
 4341   interface(REG_INTER);
 4342 %}
 4343 
 4344 operand rscratch2RegI() %{
 4345   constraint(ALLOC_IN_RC(rscratch2_bits32_reg));
 4346   match(iRegIdst);
 4347   format %{ %}
 4348   interface(REG_INTER);
 4349 %}
 4350 
 4351 operand rarg1RegI() %{
 4352   constraint(ALLOC_IN_RC(rarg1_bits32_reg));
 4353   match(iRegIdst);
 4354   format %{ %}
 4355   interface(REG_INTER);
 4356 %}
 4357 
 4358 operand rarg2RegI() %{
 4359   constraint(ALLOC_IN_RC(rarg2_bits32_reg));
 4360   match(iRegIdst);
 4361   format %{ %}
 4362   interface(REG_INTER);
 4363 %}
 4364 
 4365 operand rarg3RegI() %{
 4366   constraint(ALLOC_IN_RC(rarg3_bits32_reg));
 4367   match(iRegIdst);
 4368   format %{ %}
 4369   interface(REG_INTER);
 4370 %}
 4371 
 4372 operand rarg4RegI() %{
 4373   constraint(ALLOC_IN_RC(rarg4_bits32_reg));
 4374   match(iRegIdst);
 4375   format %{ %}
 4376   interface(REG_INTER);
 4377 %}
 4378 
 4379 operand rarg1RegL() %{
 4380   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4381   match(iRegLdst);
 4382   format %{ %}
 4383   interface(REG_INTER);
 4384 %}
 4385 
 4386 operand rarg2RegL() %{
 4387   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4388   match(iRegLdst);
 4389   format %{ %}
 4390   interface(REG_INTER);
 4391 %}
 4392 
 4393 operand rarg3RegL() %{
 4394   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4395   match(iRegLdst);
 4396   format %{ %}
 4397   interface(REG_INTER);
 4398 %}
 4399 
 4400 operand rarg4RegL() %{
 4401   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4402   match(iRegLdst);
 4403   format %{ %}
 4404   interface(REG_INTER);
 4405 %}
 4406 
 4407 // Pointer Destination Register
 4408 // See definition of reg_class bits64_reg_rw.
 4409 operand iRegPdst() %{
 4410   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4411   match(RegP);
 4412   match(rscratch1RegP);
 4413   match(rscratch2RegP);
 4414   match(rarg1RegP);
 4415   match(rarg2RegP);
 4416   match(rarg3RegP);
 4417   match(rarg4RegP);
 4418   format %{ %}
 4419   interface(REG_INTER);
 4420 %}
 4421 
 4422 // Pointer Destination Register
 4423 // Operand not using r11 and r12 (killed in epilog).
 4424 operand iRegPdstNoScratch() %{
 4425   constraint(ALLOC_IN_RC(bits64_reg_leaf_call));
 4426   match(RegP);
 4427   match(rarg1RegP);
 4428   match(rarg2RegP);
 4429   match(rarg3RegP);
 4430   match(rarg4RegP);
 4431   format %{ %}
 4432   interface(REG_INTER);
 4433 %}
 4434 
 4435 // Pointer Source Register
 4436 // See definition of reg_class bits64_reg_ro.
 4437 operand iRegPsrc() %{
 4438   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4439   match(RegP);
 4440   match(iRegPdst);
 4441   match(rscratch1RegP);
 4442   match(rscratch2RegP);
 4443   match(rarg1RegP);
 4444   match(rarg2RegP);
 4445   match(rarg3RegP);
 4446   match(rarg4RegP);
 4447   match(threadRegP);
 4448   format %{ %}
 4449   interface(REG_INTER);
 4450 %}
 4451 
 4452 // Thread operand.
 4453 operand threadRegP() %{
 4454   constraint(ALLOC_IN_RC(thread_bits64_reg));
 4455   match(iRegPdst);
 4456   format %{ "R16" %}
 4457   interface(REG_INTER);
 4458 %}
 4459 
 4460 operand rscratch1RegP() %{
 4461   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4462   match(iRegPdst);
 4463   format %{ "R11" %}
 4464   interface(REG_INTER);
 4465 %}
 4466 
 4467 operand rscratch2RegP() %{
 4468   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4469   match(iRegPdst);
 4470   format %{ %}
 4471   interface(REG_INTER);
 4472 %}
 4473 
 4474 operand rarg1RegP() %{
 4475   constraint(ALLOC_IN_RC(rarg1_bits64_reg));
 4476   match(iRegPdst);
 4477   format %{ %}
 4478   interface(REG_INTER);
 4479 %}
 4480 
 4481 operand rarg2RegP() %{
 4482   constraint(ALLOC_IN_RC(rarg2_bits64_reg));
 4483   match(iRegPdst);
 4484   format %{ %}
 4485   interface(REG_INTER);
 4486 %}
 4487 
 4488 operand rarg3RegP() %{
 4489   constraint(ALLOC_IN_RC(rarg3_bits64_reg));
 4490   match(iRegPdst);
 4491   format %{ %}
 4492   interface(REG_INTER);
 4493 %}
 4494 
 4495 operand rarg4RegP() %{
 4496   constraint(ALLOC_IN_RC(rarg4_bits64_reg));
 4497   match(iRegPdst);
 4498   format %{ %}
 4499   interface(REG_INTER);
 4500 %}
 4501 
 4502 operand iRegNsrc() %{
 4503   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4504   match(RegN);
 4505   match(iRegNdst);
 4506 
 4507   format %{ %}
 4508   interface(REG_INTER);
 4509 %}
 4510 
 4511 operand iRegNdst() %{
 4512   constraint(ALLOC_IN_RC(bits32_reg_rw));
 4513   match(RegN);
 4514 
 4515   format %{ %}
 4516   interface(REG_INTER);
 4517 %}
 4518 
 4519 // Long Destination Register
 4520 // See definition of reg_class bits64_reg_rw.
 4521 operand iRegLdst() %{
 4522   constraint(ALLOC_IN_RC(bits64_reg_rw));
 4523   match(RegL);
 4524   match(rscratch1RegL);
 4525   match(rscratch2RegL);
 4526   format %{ %}
 4527   interface(REG_INTER);
 4528 %}
 4529 
 4530 // Long Source Register
 4531 // See definition of reg_class bits64_reg_ro.
 4532 operand iRegLsrc() %{
 4533   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4534   match(RegL);
 4535   match(iRegLdst);
 4536   match(rscratch1RegL);
 4537   match(rscratch2RegL);
 4538   format %{ %}
 4539   interface(REG_INTER);
 4540 %}
 4541 
 4542 // Special operand for ConvL2I.
 4543 operand iRegL2Isrc(iRegLsrc reg) %{
 4544   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4545   match(ConvL2I reg);
 4546   format %{ "ConvL2I($reg)" %}
 4547   interface(REG_INTER)
 4548 %}
 4549 
 4550 operand rscratch1RegL() %{
 4551   constraint(ALLOC_IN_RC(rscratch1_bits64_reg));
 4552   match(RegL);
 4553   format %{ %}
 4554   interface(REG_INTER);
 4555 %}
 4556 
 4557 operand rscratch2RegL() %{
 4558   constraint(ALLOC_IN_RC(rscratch2_bits64_reg));
 4559   match(RegL);
 4560   format %{ %}
 4561   interface(REG_INTER);
 4562 %}
 4563 
 4564 // Condition Code Flag Registers
 4565 operand flagsReg() %{
 4566   constraint(ALLOC_IN_RC(int_flags));
 4567   match(RegFlags);
 4568   format %{ %}
 4569   interface(REG_INTER);
 4570 %}
 4571 
 4572 operand flagsRegSrc() %{
 4573   constraint(ALLOC_IN_RC(int_flags_ro));
 4574   match(RegFlags);
 4575   match(flagsReg);
 4576   match(flagsRegCR0);
 4577   format %{ %}
 4578   interface(REG_INTER);
 4579 %}
 4580 
 4581 // Condition Code Flag Register CR0
 4582 operand flagsRegCR0() %{
 4583   constraint(ALLOC_IN_RC(int_flags_CR0));
 4584   match(RegFlags);
 4585   format %{ "CR0" %}
 4586   interface(REG_INTER);
 4587 %}
 4588 
 4589 operand flagsRegCR1() %{
 4590   constraint(ALLOC_IN_RC(int_flags_CR1));
 4591   match(RegFlags);
 4592   format %{ "CR1" %}
 4593   interface(REG_INTER);
 4594 %}
 4595 
 4596 operand flagsRegCR6() %{
 4597   constraint(ALLOC_IN_RC(int_flags_CR6));
 4598   match(RegFlags);
 4599   format %{ "CR6" %}
 4600   interface(REG_INTER);
 4601 %}
 4602 
 4603 operand regCTR() %{
 4604   constraint(ALLOC_IN_RC(ctr_reg));
 4605   // RegFlags should work. Introducing a RegSpecial type would cause a
 4606   // lot of changes.
 4607   match(RegFlags);
 4608   format %{"SR_CTR" %}
 4609   interface(REG_INTER);
 4610 %}
 4611 
 4612 operand regD() %{
 4613   constraint(ALLOC_IN_RC(dbl_reg));
 4614   match(RegD);
 4615   format %{ %}
 4616   interface(REG_INTER);
 4617 %}
 4618 
 4619 operand regF() %{
 4620   constraint(ALLOC_IN_RC(flt_reg));
 4621   match(RegF);
 4622   format %{ %}
 4623   interface(REG_INTER);
 4624 %}
 4625 
 4626 // Special Registers
 4627 
 4628 // Method Register
 4629 operand inline_cache_regP(iRegPdst reg) %{
 4630   constraint(ALLOC_IN_RC(r19_bits64_reg)); // inline_cache_reg
 4631   match(reg);
 4632   format %{ %}
 4633   interface(REG_INTER);
 4634 %}
 4635 
 4636 // Operands to remove register moves in unscaled mode.
 4637 // Match read/write registers with an EncodeP node if neither shift nor add are required.
 4638 operand iRegP2N(iRegPsrc reg) %{
 4639   predicate(false /* TODO: PPC port MatchDecodeNodes*/&& CompressedOops::shift() == 0);
 4640   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4641   match(EncodeP reg);
 4642   format %{ "$reg" %}
 4643   interface(REG_INTER)
 4644 %}
 4645 
 4646 operand iRegN2P(iRegNsrc reg) %{
 4647   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4648   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4649   match(DecodeN reg);
 4650   format %{ "$reg" %}
 4651   interface(REG_INTER)
 4652 %}
 4653 
 4654 operand iRegN2P_klass(iRegNsrc reg) %{
 4655   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4656   constraint(ALLOC_IN_RC(bits32_reg_ro));
 4657   match(DecodeNKlass reg);
 4658   format %{ "$reg" %}
 4659   interface(REG_INTER)
 4660 %}
 4661 
 4662 //----------Complex Operands---------------------------------------------------
 4663 // Indirect Memory Reference
 4664 operand indirect(iRegPsrc reg) %{
 4665   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4666   match(reg);
 4667   op_cost(100);
 4668   format %{ "[$reg]" %}
 4669   interface(MEMORY_INTER) %{
 4670     base($reg);
 4671     index(0x0);
 4672     scale(0x0);
 4673     disp(0x0);
 4674   %}
 4675 %}
 4676 
 4677 // Indirect with Offset
 4678 operand indOffset16(iRegPsrc reg, immL16 offset) %{
 4679   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4680   match(AddP reg offset);
 4681   op_cost(100);
 4682   format %{ "[$reg + $offset]" %}
 4683   interface(MEMORY_INTER) %{
 4684     base($reg);
 4685     index(0x0);
 4686     scale(0x0);
 4687     disp($offset);
 4688   %}
 4689 %}
 4690 
 4691 // Indirect with 4-aligned Offset
 4692 operand indOffset16Alg4(iRegPsrc reg, immL16Alg4 offset) %{
 4693   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4694   match(AddP reg offset);
 4695   op_cost(100);
 4696   format %{ "[$reg + $offset]" %}
 4697   interface(MEMORY_INTER) %{
 4698     base($reg);
 4699     index(0x0);
 4700     scale(0x0);
 4701     disp($offset);
 4702   %}
 4703 %}
 4704 
 4705 //----------Complex Operands for Compressed OOPs-------------------------------
 4706 // Compressed OOPs with narrow_oop_shift == 0.
 4707 
 4708 // Indirect Memory Reference, compressed OOP
 4709 operand indirectNarrow(iRegNsrc reg) %{
 4710   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4711   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4712   match(DecodeN reg);
 4713   op_cost(100);
 4714   format %{ "[$reg]" %}
 4715   interface(MEMORY_INTER) %{
 4716     base($reg);
 4717     index(0x0);
 4718     scale(0x0);
 4719     disp(0x0);
 4720   %}
 4721 %}
 4722 
 4723 operand indirectNarrow_klass(iRegNsrc reg) %{
 4724   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4725   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4726   match(DecodeNKlass reg);
 4727   op_cost(100);
 4728   format %{ "[$reg]" %}
 4729   interface(MEMORY_INTER) %{
 4730     base($reg);
 4731     index(0x0);
 4732     scale(0x0);
 4733     disp(0x0);
 4734   %}
 4735 %}
 4736 
 4737 // Indirect with Offset, compressed OOP
 4738 operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{
 4739   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4740   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4741   match(AddP (DecodeN reg) offset);
 4742   op_cost(100);
 4743   format %{ "[$reg + $offset]" %}
 4744   interface(MEMORY_INTER) %{
 4745     base($reg);
 4746     index(0x0);
 4747     scale(0x0);
 4748     disp($offset);
 4749   %}
 4750 %}
 4751 
 4752 operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
 4753   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4754   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4755   match(AddP (DecodeNKlass reg) offset);
 4756   op_cost(100);
 4757   format %{ "[$reg + $offset]" %}
 4758   interface(MEMORY_INTER) %{
 4759     base($reg);
 4760     index(0x0);
 4761     scale(0x0);
 4762     disp($offset);
 4763   %}
 4764 %}
 4765 
 4766 // Indirect with 4-aligned Offset, compressed OOP
 4767 operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{
 4768   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
 4769   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4770   match(AddP (DecodeN reg) offset);
 4771   op_cost(100);
 4772   format %{ "[$reg + $offset]" %}
 4773   interface(MEMORY_INTER) %{
 4774     base($reg);
 4775     index(0x0);
 4776     scale(0x0);
 4777     disp($offset);
 4778   %}
 4779 %}
 4780 
 4781 operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
 4782   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0);
 4783   constraint(ALLOC_IN_RC(bits64_reg_ro));
 4784   match(AddP (DecodeNKlass reg) offset);
 4785   op_cost(100);
 4786   format %{ "[$reg + $offset]" %}
 4787   interface(MEMORY_INTER) %{
 4788     base($reg);
 4789     index(0x0);
 4790     scale(0x0);
 4791     disp($offset);
 4792   %}
 4793 %}
 4794 
 4795 //----------Special Memory Operands--------------------------------------------
 4796 // Stack Slot Operand
 4797 //
 4798 // This operand is used for loading and storing temporary values on
 4799 // the stack where a match requires a value to flow through memory.
 4800 operand stackSlotI(sRegI reg) %{
 4801   constraint(ALLOC_IN_RC(stack_slots));
 4802   op_cost(100);
 4803   //match(RegI);
 4804   format %{ "[sp+$reg]" %}
 4805   interface(MEMORY_INTER) %{
 4806     base(0x1);   // R1_SP
 4807     index(0x0);
 4808     scale(0x0);
 4809     disp($reg);  // Stack Offset
 4810   %}
 4811 %}
 4812 
 4813 operand stackSlotL(sRegL reg) %{
 4814   constraint(ALLOC_IN_RC(stack_slots));
 4815   op_cost(100);
 4816   //match(RegL);
 4817   format %{ "[sp+$reg]" %}
 4818   interface(MEMORY_INTER) %{
 4819     base(0x1);   // R1_SP
 4820     index(0x0);
 4821     scale(0x0);
 4822     disp($reg);  // Stack Offset
 4823   %}
 4824 %}
 4825 
 4826 operand stackSlotP(sRegP reg) %{
 4827   constraint(ALLOC_IN_RC(stack_slots));
 4828   op_cost(100);
 4829   //match(RegP);
 4830   format %{ "[sp+$reg]" %}
 4831   interface(MEMORY_INTER) %{
 4832     base(0x1);   // R1_SP
 4833     index(0x0);
 4834     scale(0x0);
 4835     disp($reg);  // Stack Offset
 4836   %}
 4837 %}
 4838 
 4839 operand stackSlotF(sRegF reg) %{
 4840   constraint(ALLOC_IN_RC(stack_slots));
 4841   op_cost(100);
 4842   //match(RegF);
 4843   format %{ "[sp+$reg]" %}
 4844   interface(MEMORY_INTER) %{
 4845     base(0x1);   // R1_SP
 4846     index(0x0);
 4847     scale(0x0);
 4848     disp($reg);  // Stack Offset
 4849   %}
 4850 %}
 4851 
 4852 operand stackSlotD(sRegD reg) %{
 4853   constraint(ALLOC_IN_RC(stack_slots));
 4854   op_cost(100);
 4855   //match(RegD);
 4856   format %{ "[sp+$reg]" %}
 4857   interface(MEMORY_INTER) %{
 4858     base(0x1);   // R1_SP
 4859     index(0x0);
 4860     scale(0x0);
 4861     disp($reg);  // Stack Offset
 4862   %}
 4863 %}
 4864 
 4865 // Operands for expressing Control Flow
 4866 // NOTE: Label is a predefined operand which should not be redefined in
 4867 //       the AD file. It is generically handled within the ADLC.
 4868 
 4869 //----------Conditional Branch Operands----------------------------------------
 4870 // Comparison Op
 4871 //
 4872 // This is the operation of the comparison, and is limited to the
 4873 // following set of codes: L (<), LE (<=), G (>), GE (>=), E (==), NE
 4874 // (!=).
 4875 //
 4876 // Other attributes of the comparison, such as unsignedness, are specified
 4877 // by the comparison instruction that sets a condition code flags register.
 4878 // That result is represented by a flags operand whose subtype is appropriate
 4879 // to the unsignedness (etc.) of the comparison.
 4880 //
 4881 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4882 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4883 // by matching a specific subtype of Bool operand below.
 4884 
 4885 // When used for floating point comparisons: unordered same as less.
 4886 operand cmpOp() %{
 4887   match(Bool);
 4888   format %{ "" %}
 4889   interface(COND_INTER) %{
 4890                            // BO only encodes bit 4 of bcondCRbiIsX, as bits 1-3 are always '100'.
 4891                            //           BO          &  BI
 4892     equal(0xA);            // 10 10:   bcondCRbiIs1 & Condition::equal
 4893     not_equal(0x2);        // 00 10:   bcondCRbiIs0 & Condition::equal
 4894     less(0x8);             // 10 00:   bcondCRbiIs1 & Condition::less
 4895     greater_equal(0x0);    // 00 00:   bcondCRbiIs0 & Condition::less
 4896     less_equal(0x1);       // 00 01:   bcondCRbiIs0 & Condition::greater
 4897     greater(0x9);          // 10 01:   bcondCRbiIs1 & Condition::greater
 4898     overflow(0xB);         // 10 11:   bcondCRbiIs1 & Condition::summary_overflow
 4899     no_overflow(0x3);      // 00 11:   bcondCRbiIs0 & Condition::summary_overflow
 4900   %}
 4901 %}
 4902 
 4903 //----------OPERAND CLASSES----------------------------------------------------
 4904 // Operand Classes are groups of operands that are used to simplify
 4905 // instruction definitions by not requiring the AD writer to specify
 4906 // seperate instructions for every form of operand when the
 4907 // instruction accepts multiple operand types with the same basic
 4908 // encoding and format. The classic case of this is memory operands.
 4909 // Indirect is not included since its use is limited to Compare & Swap.
 4910 
 4911 opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 4912 // Memory operand where offsets are 4-aligned. Required for ld, std.
 4913 opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 4914 opclass indirectMemory(indirect, indirectNarrow);
 4915 
 4916 // Special opclass for I and ConvL2I.
 4917 opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc);
 4918 
 4919 // Operand classes to match encode and decode. iRegN_P2N is only used
 4920 // for storeN. I have never seen an encode node elsewhere.
 4921 opclass iRegN_P2N(iRegNsrc, iRegP2N);
 4922 opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 4923 
 4924 //----------PIPELINE-----------------------------------------------------------
 4925 
 4926 pipeline %{
 4927 
 4928 // See J.M.Tendler et al. "Power4 system microarchitecture", IBM
 4929 // J. Res. & Dev., No. 1, Jan. 2002.
 4930 
 4931 //----------ATTRIBUTES---------------------------------------------------------
 4932 attributes %{
 4933 
 4934   // Power4 instructions are of fixed length.
 4935   fixed_size_instructions;
 4936 
 4937   // TODO: if `bundle' means number of instructions fetched
 4938   // per cycle, this is 8. If `bundle' means Power4 `group', that is
 4939   // max instructions issued per cycle, this is 5.
 4940   max_instructions_per_bundle = 8;
 4941 
 4942   // A Power4 instruction is 4 bytes long.
 4943   instruction_unit_size = 4;
 4944 
 4945   // The Power4 processor fetches 64 bytes...
 4946   instruction_fetch_unit_size = 64;
 4947 
 4948   // ...in one line
 4949   instruction_fetch_units = 1
 4950 
 4951   // Unused, list one so that array generated by adlc is not empty.
 4952   // Aix compiler chokes if _nop_count = 0.
 4953   nops(fxNop);
 4954 %}
 4955 
 4956 //----------RESOURCES----------------------------------------------------------
 4957 // Resources are the functional units available to the machine
 4958 resources(
 4959    PPC_BR,         // branch unit
 4960    PPC_CR,         // condition unit
 4961    PPC_FX1,        // integer arithmetic unit 1
 4962    PPC_FX2,        // integer arithmetic unit 2
 4963    PPC_LDST1,      // load/store unit 1
 4964    PPC_LDST2,      // load/store unit 2
 4965    PPC_FP1,        // float arithmetic unit 1
 4966    PPC_FP2,        // float arithmetic unit 2
 4967    PPC_LDST = PPC_LDST1 | PPC_LDST2,
 4968    PPC_FX = PPC_FX1 | PPC_FX2,
 4969    PPC_FP = PPC_FP1 | PPC_FP2
 4970  );
 4971 
 4972 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4973 // Pipeline Description specifies the stages in the machine's pipeline
 4974 pipe_desc(
 4975    // Power4 longest pipeline path
 4976    PPC_IF,   // instruction fetch
 4977    PPC_IC,
 4978    //PPC_BP, // branch prediction
 4979    PPC_D0,   // decode
 4980    PPC_D1,   // decode
 4981    PPC_D2,   // decode
 4982    PPC_D3,   // decode
 4983    PPC_Xfer1,
 4984    PPC_GD,   // group definition
 4985    PPC_MP,   // map
 4986    PPC_ISS,  // issue
 4987    PPC_RF,   // resource fetch
 4988    PPC_EX1,  // execute (all units)
 4989    PPC_EX2,  // execute (FP, LDST)
 4990    PPC_EX3,  // execute (FP, LDST)
 4991    PPC_EX4,  // execute (FP)
 4992    PPC_EX5,  // execute (FP)
 4993    PPC_EX6,  // execute (FP)
 4994    PPC_WB,   // write back
 4995    PPC_Xfer2,
 4996    PPC_CP
 4997  );
 4998 
 4999 //----------PIPELINE CLASSES---------------------------------------------------
 5000 // Pipeline Classes describe the stages in which input and output are
 5001 // referenced by the hardware pipeline.
 5002 
 5003 // Simple pipeline classes.
 5004 
 5005 // Default pipeline class.
 5006 pipe_class pipe_class_default() %{
 5007   single_instruction;
 5008   fixed_latency(2);
 5009 %}
 5010 
 5011 // Pipeline class for empty instructions.
 5012 pipe_class pipe_class_empty() %{
 5013   single_instruction;
 5014   fixed_latency(0);
 5015 %}
 5016 
 5017 // Pipeline class for compares.
 5018 pipe_class pipe_class_compare() %{
 5019   single_instruction;
 5020   fixed_latency(16);
 5021 %}
 5022 
 5023 // Pipeline class for traps.
 5024 pipe_class pipe_class_trap() %{
 5025   single_instruction;
 5026   fixed_latency(100);
 5027 %}
 5028 
 5029 // Pipeline class for memory operations.
 5030 pipe_class pipe_class_memory() %{
 5031   single_instruction;
 5032   fixed_latency(16);
 5033 %}
 5034 
 5035 // Pipeline class for call.
 5036 pipe_class pipe_class_call() %{
 5037   single_instruction;
 5038   fixed_latency(100);
 5039 %}
 5040 
 5041 // Define the class for the Nop node.
 5042 define %{
 5043    MachNop = pipe_class_default;
 5044 %}
 5045 
 5046 %}
 5047 
 5048 //----------INSTRUCTIONS-------------------------------------------------------
 5049 
 5050 // Naming of instructions:
 5051 //   opA_operB / opA_operB_operC:
 5052 //     Operation 'op' with one or two source operands 'oper'. Result
 5053 //     type is A, source operand types are B and C.
 5054 //     Iff A == B == C, B and C are left out.
 5055 //
 5056 // The instructions are ordered according to the following scheme:
 5057 //  - loads
 5058 //  - load constants
 5059 //  - prefetch
 5060 //  - store
 5061 //  - encode/decode
 5062 //  - membar
 5063 //  - conditional moves
 5064 //  - compare & swap
 5065 //  - arithmetic and logic operations
 5066 //    * int: Add, Sub, Mul, Div, Mod
 5067 //    * int: lShift, arShift, urShift, rot
 5068 //    * float: Add, Sub, Mul, Div
 5069 //    * and, or, xor ...
 5070 //  - register moves: float <-> int, reg <-> stack, repl
 5071 //  - cast (high level type cast, XtoP, castPP, castII, not_null etc.
 5072 //  - conv (low level type cast requiring bit changes (sign extend etc)
 5073 //  - compares, range & zero checks.
 5074 //  - branches
 5075 //  - complex operations, intrinsics, min, max, replicate
 5076 //  - lock
 5077 //  - Calls
 5078 //
 5079 // If there are similar instructions with different types they are sorted:
 5080 // int before float
 5081 // small before big
 5082 // signed before unsigned
 5083 // e.g., loadS before loadUS before loadI before loadF.
 5084 
 5085 
 5086 //----------Load/Store Instructions--------------------------------------------
 5087 
 5088 //----------Load Instructions--------------------------------------------------
 5089 
 5090 // Converts byte to int.
 5091 // As convB2I_reg, but without match rule.  The match rule of convB2I_reg
 5092 // reuses the 'amount' operand, but adlc expects that operand specification
 5093 // and operands in match rule are equivalent.
 5094 instruct convB2I_reg_2(iRegIdst dst, iRegIsrc src) %{
 5095   effect(DEF dst, USE src);
 5096   format %{ "EXTSB   $dst, $src \t// byte->int" %}
 5097   size(4);
 5098   ins_encode %{
 5099     __ extsb($dst$$Register, $src$$Register);
 5100   %}
 5101   ins_pipe(pipe_class_default);
 5102 %}
 5103 
 5104 instruct loadUB_indirect(iRegIdst dst, indirectMemory mem) %{
 5105   // match-rule, false predicate
 5106   match(Set dst (LoadB mem));
 5107   predicate(false);
 5108 
 5109   format %{ "LBZ     $dst, $mem" %}
 5110   size(4);
 5111   ins_encode( enc_lbz(dst, mem) );
 5112   ins_pipe(pipe_class_memory);
 5113 %}
 5114 
 5115 instruct loadUB_indirect_ac(iRegIdst dst, indirectMemory mem) %{
 5116   // match-rule, false predicate
 5117   match(Set dst (LoadB mem));
 5118   predicate(false);
 5119 
 5120   format %{ "LBZ     $dst, $mem\n\t"
 5121             "TWI     $dst\n\t"
 5122             "ISYNC" %}
 5123   size(12);
 5124   ins_encode( enc_lbz_ac(dst, mem) );
 5125   ins_pipe(pipe_class_memory);
 5126 %}
 5127 
 5128 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5129 instruct loadB_indirect_Ex(iRegIdst dst, indirectMemory mem) %{
 5130   match(Set dst (LoadB mem));
 5131   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5132   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5133   expand %{
 5134     iRegIdst tmp;
 5135     loadUB_indirect(tmp, mem);
 5136     convB2I_reg_2(dst, tmp);
 5137   %}
 5138 %}
 5139 
 5140 instruct loadB_indirect_ac_Ex(iRegIdst dst, indirectMemory mem) %{
 5141   match(Set dst (LoadB mem));
 5142   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5143   expand %{
 5144     iRegIdst tmp;
 5145     loadUB_indirect_ac(tmp, mem);
 5146     convB2I_reg_2(dst, tmp);
 5147   %}
 5148 %}
 5149 
 5150 instruct loadUB_indOffset16(iRegIdst dst, indOffset16 mem) %{
 5151   // match-rule, false predicate
 5152   match(Set dst (LoadB mem));
 5153   predicate(false);
 5154 
 5155   format %{ "LBZ     $dst, $mem" %}
 5156   size(4);
 5157   ins_encode( enc_lbz(dst, mem) );
 5158   ins_pipe(pipe_class_memory);
 5159 %}
 5160 
 5161 instruct loadUB_indOffset16_ac(iRegIdst dst, indOffset16 mem) %{
 5162   // match-rule, false predicate
 5163   match(Set dst (LoadB mem));
 5164   predicate(false);
 5165 
 5166   format %{ "LBZ     $dst, $mem\n\t"
 5167             "TWI     $dst\n\t"
 5168             "ISYNC" %}
 5169   size(12);
 5170   ins_encode( enc_lbz_ac(dst, mem) );
 5171   ins_pipe(pipe_class_memory);
 5172 %}
 5173 
 5174 // Load Byte (8bit signed). LoadB = LoadUB + ConvUB2B.
 5175 instruct loadB_indOffset16_Ex(iRegIdst dst, indOffset16 mem) %{
 5176   match(Set dst (LoadB mem));
 5177   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5178   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
 5179 
 5180   expand %{
 5181     iRegIdst tmp;
 5182     loadUB_indOffset16(tmp, mem);
 5183     convB2I_reg_2(dst, tmp);
 5184   %}
 5185 %}
 5186 
 5187 instruct loadB_indOffset16_ac_Ex(iRegIdst dst, indOffset16 mem) %{
 5188   match(Set dst (LoadB mem));
 5189   ins_cost(3*MEMORY_REF_COST + DEFAULT_COST);
 5190 
 5191   expand %{
 5192     iRegIdst tmp;
 5193     loadUB_indOffset16_ac(tmp, mem);
 5194     convB2I_reg_2(dst, tmp);
 5195   %}
 5196 %}
 5197 
 5198 // Load Unsigned Byte (8bit UNsigned) into an int reg.
 5199 instruct loadUB(iRegIdst dst, memory mem) %{
 5200   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5201   match(Set dst (LoadUB mem));
 5202   ins_cost(MEMORY_REF_COST);
 5203 
 5204   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int" %}
 5205   size(4);
 5206   ins_encode( enc_lbz(dst, mem) );
 5207   ins_pipe(pipe_class_memory);
 5208 %}
 5209 
 5210 // Load  Unsigned Byte (8bit UNsigned) acquire.
 5211 instruct loadUB_ac(iRegIdst dst, memory mem) %{
 5212   match(Set dst (LoadUB mem));
 5213   ins_cost(3*MEMORY_REF_COST);
 5214 
 5215   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to int, acquire\n\t"
 5216             "TWI     $dst\n\t"
 5217             "ISYNC" %}
 5218   size(12);
 5219   ins_encode( enc_lbz_ac(dst, mem) );
 5220   ins_pipe(pipe_class_memory);
 5221 %}
 5222 
 5223 // Load Unsigned Byte (8bit UNsigned) into a Long Register.
 5224 instruct loadUB2L(iRegLdst dst, memory mem) %{
 5225   match(Set dst (ConvI2L (LoadUB mem)));
 5226   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5227   ins_cost(MEMORY_REF_COST);
 5228 
 5229   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long" %}
 5230   size(4);
 5231   ins_encode( enc_lbz(dst, mem) );
 5232   ins_pipe(pipe_class_memory);
 5233 %}
 5234 
 5235 instruct loadUB2L_ac(iRegLdst dst, memory mem) %{
 5236   match(Set dst (ConvI2L (LoadUB mem)));
 5237   ins_cost(3*MEMORY_REF_COST);
 5238 
 5239   format %{ "LBZ     $dst, $mem \t// byte, zero-extend to long, acquire\n\t"
 5240             "TWI     $dst\n\t"
 5241             "ISYNC" %}
 5242   size(12);
 5243   ins_encode( enc_lbz_ac(dst, mem) );
 5244   ins_pipe(pipe_class_memory);
 5245 %}
 5246 
 5247 // Load Short (16bit signed)
 5248 instruct loadS(iRegIdst dst, memory mem) %{
 5249   match(Set dst (LoadS mem));
 5250   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5251   ins_cost(MEMORY_REF_COST);
 5252 
 5253   format %{ "LHA     $dst, $mem" %}
 5254   size(4);
 5255   ins_encode %{
 5256     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5257     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5258   %}
 5259   ins_pipe(pipe_class_memory);
 5260 %}
 5261 
 5262 // Load Short (16bit signed) acquire.
 5263 instruct loadS_ac(iRegIdst dst, memory mem) %{
 5264   match(Set dst (LoadS mem));
 5265   ins_cost(3*MEMORY_REF_COST);
 5266 
 5267   format %{ "LHA     $dst, $mem\t acquire\n\t"
 5268             "TWI     $dst\n\t"
 5269             "ISYNC" %}
 5270   size(12);
 5271   ins_encode %{
 5272     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5273     __ lha($dst$$Register, Idisp, $mem$$base$$Register);
 5274     __ twi_0($dst$$Register);
 5275     __ isync();
 5276   %}
 5277   ins_pipe(pipe_class_memory);
 5278 %}
 5279 
 5280 // Load Char (16bit unsigned)
 5281 instruct loadUS(iRegIdst dst, memory mem) %{
 5282   match(Set dst (LoadUS mem));
 5283   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5284   ins_cost(MEMORY_REF_COST);
 5285 
 5286   format %{ "LHZ     $dst, $mem" %}
 5287   size(4);
 5288   ins_encode( enc_lhz(dst, mem) );
 5289   ins_pipe(pipe_class_memory);
 5290 %}
 5291 
 5292 // Load Char (16bit unsigned) acquire.
 5293 instruct loadUS_ac(iRegIdst dst, memory mem) %{
 5294   match(Set dst (LoadUS mem));
 5295   ins_cost(3*MEMORY_REF_COST);
 5296 
 5297   format %{ "LHZ     $dst, $mem \t// acquire\n\t"
 5298             "TWI     $dst\n\t"
 5299             "ISYNC" %}
 5300   size(12);
 5301   ins_encode( enc_lhz_ac(dst, mem) );
 5302   ins_pipe(pipe_class_memory);
 5303 %}
 5304 
 5305 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register.
 5306 instruct loadUS2L(iRegLdst dst, memory mem) %{
 5307   match(Set dst (ConvI2L (LoadUS mem)));
 5308   predicate(_kids[0]->_leaf->as_Load()->is_unordered() || followed_by_acquire(_kids[0]->_leaf));
 5309   ins_cost(MEMORY_REF_COST);
 5310 
 5311   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long" %}
 5312   size(4);
 5313   ins_encode( enc_lhz(dst, mem) );
 5314   ins_pipe(pipe_class_memory);
 5315 %}
 5316 
 5317 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register acquire.
 5318 instruct loadUS2L_ac(iRegLdst dst, memory mem) %{
 5319   match(Set dst (ConvI2L (LoadUS mem)));
 5320   ins_cost(3*MEMORY_REF_COST);
 5321 
 5322   format %{ "LHZ     $dst, $mem \t// short, zero-extend to long, acquire\n\t"
 5323             "TWI     $dst\n\t"
 5324             "ISYNC" %}
 5325   size(12);
 5326   ins_encode( enc_lhz_ac(dst, mem) );
 5327   ins_pipe(pipe_class_memory);
 5328 %}
 5329 
 5330 // Load Integer.
 5331 instruct loadI(iRegIdst dst, memory mem) %{
 5332   match(Set dst (LoadI mem));
 5333   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5334   ins_cost(MEMORY_REF_COST);
 5335 
 5336   format %{ "LWZ     $dst, $mem" %}
 5337   size(4);
 5338   ins_encode( enc_lwz(dst, mem) );
 5339   ins_pipe(pipe_class_memory);
 5340 %}
 5341 
 5342 // Load Integer acquire.
 5343 instruct loadI_ac(iRegIdst dst, memory mem) %{
 5344   match(Set dst (LoadI mem));
 5345   ins_cost(3*MEMORY_REF_COST);
 5346 
 5347   format %{ "LWZ     $dst, $mem \t// load acquire\n\t"
 5348             "TWI     $dst\n\t"
 5349             "ISYNC" %}
 5350   size(12);
 5351   ins_encode( enc_lwz_ac(dst, mem) );
 5352   ins_pipe(pipe_class_memory);
 5353 %}
 5354 
 5355 // Match loading integer and casting it to unsigned int in
 5356 // long register.
 5357 // LoadI + ConvI2L + AndL 0xffffffff.
 5358 instruct loadUI2L(iRegLdst dst, memory mem, immL_32bits mask) %{
 5359   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5360   predicate(_kids[0]->_kids[0]->_leaf->as_Load()->is_unordered());
 5361   ins_cost(MEMORY_REF_COST);
 5362 
 5363   format %{ "LWZ     $dst, $mem \t// zero-extend to long" %}
 5364   size(4);
 5365   ins_encode( enc_lwz(dst, mem) );
 5366   ins_pipe(pipe_class_memory);
 5367 %}
 5368 
 5369 // Match loading integer and casting it to long.
 5370 instruct loadI2L(iRegLdst dst, memoryAlg4 mem) %{
 5371   match(Set dst (ConvI2L (LoadI mem)));
 5372   predicate(_kids[0]->_leaf->as_Load()->is_unordered());
 5373   ins_cost(MEMORY_REF_COST);
 5374 
 5375   format %{ "LWA     $dst, $mem \t// loadI2L" %}
 5376   size(4);
 5377   ins_encode %{
 5378     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5379     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5380   %}
 5381   ins_pipe(pipe_class_memory);
 5382 %}
 5383 
 5384 // Match loading integer and casting it to long - acquire.
 5385 instruct loadI2L_ac(iRegLdst dst, memoryAlg4 mem) %{
 5386   match(Set dst (ConvI2L (LoadI mem)));
 5387   ins_cost(3*MEMORY_REF_COST);
 5388 
 5389   format %{ "LWA     $dst, $mem \t// loadI2L acquire"
 5390             "TWI     $dst\n\t"
 5391             "ISYNC" %}
 5392   size(12);
 5393   ins_encode %{
 5394     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5395     __ lwa($dst$$Register, Idisp, $mem$$base$$Register);
 5396     __ twi_0($dst$$Register);
 5397     __ isync();
 5398   %}
 5399   ins_pipe(pipe_class_memory);
 5400 %}
 5401 
 5402 // Load Long - aligned
 5403 instruct loadL(iRegLdst dst, memoryAlg4 mem) %{
 5404   match(Set dst (LoadL mem));
 5405   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5406   ins_cost(MEMORY_REF_COST);
 5407 
 5408   format %{ "LD      $dst, $mem \t// long" %}
 5409   size(4);
 5410   ins_encode( enc_ld(dst, mem) );
 5411   ins_pipe(pipe_class_memory);
 5412 %}
 5413 
 5414 // Load Long - aligned acquire.
 5415 instruct loadL_ac(iRegLdst dst, memoryAlg4 mem) %{
 5416   match(Set dst (LoadL mem));
 5417   ins_cost(3*MEMORY_REF_COST);
 5418 
 5419   format %{ "LD      $dst, $mem \t// long acquire\n\t"
 5420             "TWI     $dst\n\t"
 5421             "ISYNC" %}
 5422   size(12);
 5423   ins_encode( enc_ld_ac(dst, mem) );
 5424   ins_pipe(pipe_class_memory);
 5425 %}
 5426 
 5427 // Load Long - UNaligned
 5428 instruct loadL_unaligned(iRegLdst dst, memoryAlg4 mem) %{
 5429   match(Set dst (LoadL_unaligned mem));
 5430   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5431   ins_cost(MEMORY_REF_COST);
 5432 
 5433   format %{ "LD      $dst, $mem \t// unaligned long" %}
 5434   size(4);
 5435   ins_encode( enc_ld(dst, mem) );
 5436   ins_pipe(pipe_class_memory);
 5437 %}
 5438 
 5439 // Load nodes for superwords
 5440 
 5441 // Load Aligned Packed Byte
 5442 instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
 5443   predicate(n->as_LoadVector()->memory_size() == 8);
 5444   match(Set dst (LoadVector mem));
 5445   ins_cost(MEMORY_REF_COST);
 5446 
 5447   format %{ "LD      $dst, $mem \t// load 8-byte Vector" %}
 5448   size(4);
 5449   ins_encode( enc_ld(dst, mem) );
 5450   ins_pipe(pipe_class_memory);
 5451 %}
 5452 
 5453 // Load Aligned Packed Byte
 5454 instruct loadV16(vecX dst, indirect mem) %{
 5455   predicate(n->as_LoadVector()->memory_size() == 16);
 5456   match(Set dst (LoadVector mem));
 5457   ins_cost(MEMORY_REF_COST);
 5458 
 5459   format %{ "LXVD2X      $dst, $mem \t// load 16-byte Vector" %}
 5460   size(4);
 5461   ins_encode %{
 5462     __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
 5463   %}
 5464   ins_pipe(pipe_class_default);
 5465 %}
 5466 
 5467 // Load Range, range = array length (=jint)
 5468 instruct loadRange(iRegIdst dst, memory mem) %{
 5469   match(Set dst (LoadRange mem));
 5470   ins_cost(MEMORY_REF_COST);
 5471 
 5472   format %{ "LWZ     $dst, $mem \t// range" %}
 5473   size(4);
 5474   ins_encode( enc_lwz(dst, mem) );
 5475   ins_pipe(pipe_class_memory);
 5476 %}
 5477 
 5478 // Load Compressed Pointer
 5479 instruct loadN(iRegNdst dst, memory mem) %{
 5480   match(Set dst (LoadN mem));
 5481   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5482   ins_cost(MEMORY_REF_COST);
 5483 
 5484   format %{ "LWZ     $dst, $mem \t// load compressed ptr" %}
 5485   size(4);
 5486   ins_encode( enc_lwz(dst, mem) );
 5487   ins_pipe(pipe_class_memory);
 5488 %}
 5489 
 5490 // Load Compressed Pointer acquire.
 5491 instruct loadN_ac(iRegNdst dst, memory mem) %{
 5492   match(Set dst (LoadN mem));
 5493   ins_cost(3*MEMORY_REF_COST);
 5494 
 5495   format %{ "LWZ     $dst, $mem \t// load acquire compressed ptr\n\t"
 5496             "TWI     $dst\n\t"
 5497             "ISYNC" %}
 5498   size(12);
 5499   ins_encode( enc_lwz_ac(dst, mem) );
 5500   ins_pipe(pipe_class_memory);
 5501 %}
 5502 
 5503 // Load Compressed Pointer and decode it if narrow_oop_shift == 0.
 5504 instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{
 5505   match(Set dst (DecodeN (LoadN mem)));
 5506   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0);
 5507   ins_cost(MEMORY_REF_COST);
 5508 
 5509   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5510   size(4);
 5511   ins_encode( enc_lwz(dst, mem) );
 5512   ins_pipe(pipe_class_memory);
 5513 %}
 5514 
 5515 instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
 5516   match(Set dst (DecodeNKlass (LoadNKlass mem)));
 5517   predicate(CompressedKlassPointers::base() == NULL && CompressedKlassPointers::shift() == 0 &&
 5518             _kids[0]->_leaf->as_Load()->is_unordered());
 5519   ins_cost(MEMORY_REF_COST);
 5520 
 5521   format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
 5522   size(4);
 5523   ins_encode( enc_lwz(dst, mem) );
 5524   ins_pipe(pipe_class_memory);
 5525 %}
 5526 
 5527 // Load Pointer
 5528 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
 5529   match(Set dst (LoadP mem));
 5530   predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0);
 5531   ins_cost(MEMORY_REF_COST);
 5532 
 5533   format %{ "LD      $dst, $mem \t// ptr" %}
 5534   size(4);
 5535   ins_encode( enc_ld(dst, mem) );
 5536   ins_pipe(pipe_class_memory);
 5537 %}
 5538 
 5539 // Load Pointer acquire.
 5540 instruct loadP_ac(iRegPdst dst, memoryAlg4 mem) %{
 5541   match(Set dst (LoadP mem));
 5542   ins_cost(3*MEMORY_REF_COST);
 5543 
 5544   predicate(n->as_Load()->barrier_data() == 0);
 5545 
 5546   format %{ "LD      $dst, $mem \t// ptr acquire\n\t"
 5547             "TWI     $dst\n\t"
 5548             "ISYNC" %}
 5549   size(12);
 5550   ins_encode( enc_ld_ac(dst, mem) );
 5551   ins_pipe(pipe_class_memory);
 5552 %}
 5553 
 5554 // LoadP + CastP2L
 5555 instruct loadP2X(iRegLdst dst, memoryAlg4 mem) %{
 5556   match(Set dst (CastP2X (LoadP mem)));
 5557   predicate(_kids[0]->_leaf->as_Load()->is_unordered() && _kids[0]->_leaf->as_Load()->barrier_data() == 0);
 5558   ins_cost(MEMORY_REF_COST);
 5559 
 5560   format %{ "LD      $dst, $mem \t// ptr + p2x" %}
 5561   size(4);
 5562   ins_encode( enc_ld(dst, mem) );
 5563   ins_pipe(pipe_class_memory);
 5564 %}
 5565 
 5566 // Load compressed klass pointer.
 5567 instruct loadNKlass(iRegNdst dst, memory mem) %{
 5568   match(Set dst (LoadNKlass mem));
 5569   ins_cost(MEMORY_REF_COST);
 5570 
 5571   format %{ "LWZ     $dst, $mem \t// compressed klass ptr" %}
 5572   size(4);
 5573   ins_encode( enc_lwz(dst, mem) );
 5574   ins_pipe(pipe_class_memory);
 5575 %}
 5576 
 5577 // Load Klass Pointer
 5578 instruct loadKlass(iRegPdst dst, memoryAlg4 mem) %{
 5579   match(Set dst (LoadKlass mem));
 5580   ins_cost(MEMORY_REF_COST);
 5581 
 5582   format %{ "LD      $dst, $mem \t// klass ptr" %}
 5583   size(4);
 5584   ins_encode( enc_ld(dst, mem) );
 5585   ins_pipe(pipe_class_memory);
 5586 %}
 5587 
 5588 // Load Float
 5589 instruct loadF(regF dst, memory mem) %{
 5590   match(Set dst (LoadF mem));
 5591   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5592   ins_cost(MEMORY_REF_COST);
 5593 
 5594   format %{ "LFS     $dst, $mem" %}
 5595   size(4);
 5596   ins_encode %{
 5597     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5598     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5599   %}
 5600   ins_pipe(pipe_class_memory);
 5601 %}
 5602 
 5603 // Load Float acquire.
 5604 instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
 5605   match(Set dst (LoadF mem));
 5606   effect(TEMP cr0);
 5607   ins_cost(3*MEMORY_REF_COST);
 5608 
 5609   format %{ "LFS     $dst, $mem \t// acquire\n\t"
 5610             "FCMPU   cr0, $dst, $dst\n\t"
 5611             "BNE     cr0, next\n"
 5612             "next:\n\t"
 5613             "ISYNC" %}
 5614   size(16);
 5615   ins_encode %{
 5616     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5617     Label next;
 5618     __ lfs($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5619     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5620     __ bne(CCR0, next);
 5621     __ bind(next);
 5622     __ isync();
 5623   %}
 5624   ins_pipe(pipe_class_memory);
 5625 %}
 5626 
 5627 // Load Double - aligned
 5628 instruct loadD(regD dst, memory mem) %{
 5629   match(Set dst (LoadD mem));
 5630   predicate(n->as_Load()->is_unordered() || followed_by_acquire(n));
 5631   ins_cost(MEMORY_REF_COST);
 5632 
 5633   format %{ "LFD     $dst, $mem" %}
 5634   size(4);
 5635   ins_encode( enc_lfd(dst, mem) );
 5636   ins_pipe(pipe_class_memory);
 5637 %}
 5638 
 5639 // Load Double - aligned acquire.
 5640 instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
 5641   match(Set dst (LoadD mem));
 5642   effect(TEMP cr0);
 5643   ins_cost(3*MEMORY_REF_COST);
 5644 
 5645   format %{ "LFD     $dst, $mem \t// acquire\n\t"
 5646             "FCMPU   cr0, $dst, $dst\n\t"
 5647             "BNE     cr0, next\n"
 5648             "next:\n\t"
 5649             "ISYNC" %}
 5650   size(16);
 5651   ins_encode %{
 5652     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 5653     Label next;
 5654     __ lfd($dst$$FloatRegister, Idisp, $mem$$base$$Register);
 5655     __ fcmpu(CCR0, $dst$$FloatRegister, $dst$$FloatRegister);
 5656     __ bne(CCR0, next);
 5657     __ bind(next);
 5658     __ isync();
 5659   %}
 5660   ins_pipe(pipe_class_memory);
 5661 %}
 5662 
 5663 // Load Double - UNaligned
 5664 instruct loadD_unaligned(regD dst, memory mem) %{
 5665   match(Set dst (LoadD_unaligned mem));
 5666   // predicate(...) // Unaligned_ac is not needed (and wouldn't make sense).
 5667   ins_cost(MEMORY_REF_COST);
 5668 
 5669   format %{ "LFD     $dst, $mem" %}
 5670   size(4);
 5671   ins_encode( enc_lfd(dst, mem) );
 5672   ins_pipe(pipe_class_memory);
 5673 %}
 5674 
 5675 //----------Constants--------------------------------------------------------
 5676 
 5677 // Load MachConstantTableBase: add hi offset to global toc.
 5678 // TODO: Handle hidden register r29 in bundler!
 5679 instruct loadToc_hi(iRegLdst dst) %{
 5680   effect(DEF dst);
 5681   ins_cost(DEFAULT_COST);
 5682 
 5683   format %{ "ADDIS   $dst, R29, DISP.hi \t// load TOC hi" %}
 5684   size(4);
 5685   ins_encode %{
 5686     __ calculate_address_from_global_toc_hi16only($dst$$Register, __ method_toc());
 5687   %}
 5688   ins_pipe(pipe_class_default);
 5689 %}
 5690 
 5691 // Load MachConstantTableBase: add lo offset to global toc.
 5692 instruct loadToc_lo(iRegLdst dst, iRegLdst src) %{
 5693   effect(DEF dst, USE src);
 5694   ins_cost(DEFAULT_COST);
 5695 
 5696   format %{ "ADDI    $dst, $src, DISP.lo \t// load TOC lo" %}
 5697   size(4);
 5698   ins_encode %{
 5699     __ calculate_address_from_global_toc_lo16only($dst$$Register, __ method_toc());
 5700   %}
 5701   ins_pipe(pipe_class_default);
 5702 %}
 5703 
 5704 // Load 16-bit integer constant 0xssss????
 5705 instruct loadConI16(iRegIdst dst, immI16 src) %{
 5706   match(Set dst src);
 5707 
 5708   format %{ "LI      $dst, $src" %}
 5709   size(4);
 5710   ins_encode %{
 5711     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 5712   %}
 5713   ins_pipe(pipe_class_default);
 5714 %}
 5715 
 5716 // Load integer constant 0x????0000
 5717 instruct loadConIhi16(iRegIdst dst, immIhi16 src) %{
 5718   match(Set dst src);
 5719   ins_cost(DEFAULT_COST);
 5720 
 5721   format %{ "LIS     $dst, $src.hi" %}
 5722   size(4);
 5723   ins_encode %{
 5724     // Lis sign extends 16-bit src then shifts it 16 bit to the left.
 5725     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5726   %}
 5727   ins_pipe(pipe_class_default);
 5728 %}
 5729 
 5730 // Part 2 of loading 32 bit constant: hi16 is is src1 (properly shifted
 5731 // and sign extended), this adds the low 16 bits.
 5732 instruct loadConI32_lo16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 5733   // no match-rule, false predicate
 5734   effect(DEF dst, USE src1, USE src2);
 5735   predicate(false);
 5736 
 5737   format %{ "ORI     $dst, $src1.hi, $src2.lo" %}
 5738   size(4);
 5739   ins_encode %{
 5740     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5741   %}
 5742   ins_pipe(pipe_class_default);
 5743 %}
 5744 
 5745 instruct loadConI32(iRegIdst dst, immI32 src) %{
 5746   match(Set dst src);
 5747   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5748   // caused a build error, so we comment it out for now.
 5749   // predicate(PowerArchitecturePPC64 >= 10);
 5750   ins_cost(DEFAULT_COST+1);
 5751 
 5752   format %{ "PLI     $dst, $src" %}
 5753   size(8);
 5754   ins_encode %{
 5755     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5756     __ pli($dst$$Register, $src$$constant);
 5757   %}
 5758   ins_pipe(pipe_class_default);
 5759   ins_alignment(2);
 5760 %}
 5761 
 5762 instruct loadConI_Ex(iRegIdst dst, immI src) %{
 5763   match(Set dst src);
 5764   ins_cost(DEFAULT_COST*2);
 5765 
 5766   expand %{
 5767     // Would like to use $src$$constant.
 5768     immI16 srcLo %{ _opnds[1]->constant() %}
 5769     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5770     immIhi16 srcHi %{ _opnds[1]->constant() %}
 5771     iRegIdst tmpI;
 5772     loadConIhi16(tmpI, srcHi);
 5773     loadConI32_lo16(dst, tmpI, srcLo);
 5774   %}
 5775 %}
 5776 
 5777 // No constant pool entries required.
 5778 instruct loadConL16(iRegLdst dst, immL16 src) %{
 5779   match(Set dst src);
 5780 
 5781   format %{ "LI      $dst, $src \t// long" %}
 5782   size(4);
 5783   ins_encode %{
 5784     __ li($dst$$Register, (int)((short) ($src$$constant & 0xFFFF)));
 5785   %}
 5786   ins_pipe(pipe_class_default);
 5787 %}
 5788 
 5789 // Load long constant 0xssssssss????0000
 5790 instruct loadConL32hi16(iRegLdst dst, immL32hi16 src) %{
 5791   match(Set dst src);
 5792   ins_cost(DEFAULT_COST);
 5793 
 5794   format %{ "LIS     $dst, $src.hi \t// long" %}
 5795   size(4);
 5796   ins_encode %{
 5797     __ lis($dst$$Register, (int)((short)(($src$$constant & 0xFFFF0000) >> 16)));
 5798   %}
 5799   ins_pipe(pipe_class_default);
 5800 %}
 5801 
 5802 // To load a 32 bit constant: merge lower 16 bits into already loaded
 5803 // high 16 bits.
 5804 instruct loadConL32_lo16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 5805   // no match-rule, false predicate
 5806   effect(DEF dst, USE src1, USE src2);
 5807   predicate(false);
 5808 
 5809   format %{ "ORI     $dst, $src1, $src2.lo" %}
 5810   size(4);
 5811   ins_encode %{
 5812     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 5813   %}
 5814   ins_pipe(pipe_class_default);
 5815 %}
 5816 
 5817 // Load 32-bit long constant
 5818 instruct loadConL32_Ex(iRegLdst dst, immL32 src) %{
 5819   match(Set dst src);
 5820   ins_cost(DEFAULT_COST*2);
 5821 
 5822   expand %{
 5823     // Would like to use $src$$constant.
 5824     immL16     srcLo %{ _opnds[1]->constant() /*& 0x0000FFFFL */%}
 5825     // srcHi can be 0000 if srcLo sign-extends to a negative number.
 5826     immL32hi16 srcHi %{ _opnds[1]->constant() /*& 0xFFFF0000L */%}
 5827     iRegLdst tmpL;
 5828     loadConL32hi16(tmpL, srcHi);
 5829     loadConL32_lo16(dst, tmpL, srcLo);
 5830   %}
 5831 %}
 5832 
 5833 // Load 34-bit long constant using prefixed addi. No constant pool entries required.
 5834 instruct loadConL34(iRegLdst dst, immL34 src) %{
 5835   match(Set dst src);
 5836   // This macro is valid only in Power 10 and up, but adding the following predicate here
 5837   // caused a build error, so we comment it out for now.
 5838   // predicate(PowerArchitecturePPC64 >= 10);
 5839   ins_cost(DEFAULT_COST+1);
 5840 
 5841   format %{ "PLI     $dst, $src \t// long" %}
 5842   size(8);
 5843   ins_encode %{
 5844     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 5845     __ pli($dst$$Register, $src$$constant);
 5846   %}
 5847   ins_pipe(pipe_class_default);
 5848   ins_alignment(2);
 5849 %}
 5850 
 5851 // Load long constant 0x????000000000000.
 5852 instruct loadConLhighest16_Ex(iRegLdst dst, immLhighest16 src) %{
 5853   match(Set dst src);
 5854   ins_cost(DEFAULT_COST);
 5855 
 5856   expand %{
 5857     immL32hi16 srcHi %{ _opnds[1]->constant() >> 32 /*& 0xFFFF0000L */%}
 5858     immI shift32 %{ 32 %}
 5859     iRegLdst tmpL;
 5860     loadConL32hi16(tmpL, srcHi);
 5861     lshiftL_regL_immI(dst, tmpL, shift32);
 5862   %}
 5863 %}
 5864 
 5865 // Expand node for constant pool load: small offset.
 5866 instruct loadConL(iRegLdst dst, immL src, iRegLdst toc) %{
 5867   effect(DEF dst, USE src, USE toc);
 5868   ins_cost(MEMORY_REF_COST);
 5869 
 5870   ins_num_consts(1);
 5871   // Needed so that CallDynamicJavaDirect can compute the address of this
 5872   // instruction for relocation.
 5873   ins_field_cbuf_insts_offset(int);
 5874 
 5875   format %{ "LD      $dst, offset, $toc \t// load long $src from TOC" %}
 5876   size(4);
 5877   ins_encode( enc_load_long_constL(dst, src, toc) );
 5878   ins_pipe(pipe_class_memory);
 5879 %}
 5880 
 5881 // Expand node for constant pool load: large offset.
 5882 instruct loadConL_hi(iRegLdst dst, immL src, iRegLdst toc) %{
 5883   effect(DEF dst, USE src, USE toc);
 5884   predicate(false);
 5885 
 5886   ins_num_consts(1);
 5887   ins_field_const_toc_offset(int);
 5888   // Needed so that CallDynamicJavaDirect can compute the address of this
 5889   // instruction for relocation.
 5890   ins_field_cbuf_insts_offset(int);
 5891 
 5892   format %{ "ADDIS   $dst, $toc, offset \t// load long $src from TOC (hi)" %}
 5893   size(4);
 5894   ins_encode( enc_load_long_constL_hi(dst, toc, src) );
 5895   ins_pipe(pipe_class_default);
 5896 %}
 5897 
 5898 // Expand node for constant pool load: large offset.
 5899 // No constant pool entries required.
 5900 instruct loadConL_lo(iRegLdst dst, immL src, iRegLdst base) %{
 5901   effect(DEF dst, USE src, USE base);
 5902   predicate(false);
 5903 
 5904   ins_field_const_toc_offset_hi_node(loadConL_hiNode*);
 5905 
 5906   format %{ "LD      $dst, offset, $base \t// load long $src from TOC (lo)" %}
 5907   size(4);
 5908   ins_encode %{
 5909     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 5910     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 5911   %}
 5912   ins_pipe(pipe_class_memory);
 5913 %}
 5914 
 5915 // Load long constant from constant table. Expand in case of
 5916 // offset > 16 bit is needed.
 5917 // Adlc adds toc node MachConstantTableBase.
 5918 instruct loadConL_Ex(iRegLdst dst, immL src) %{
 5919   match(Set dst src);
 5920   ins_cost(MEMORY_REF_COST);
 5921 
 5922   format %{ "LD      $dst, offset, $constanttablebase\t// load long $src from table, postalloc expanded" %}
 5923   // We can not inline the enc_class for the expand as that does not support constanttablebase.
 5924   postalloc_expand( postalloc_expand_load_long_constant(dst, src, constanttablebase) );
 5925 %}
 5926 
 5927 // Load NULL as compressed oop.
 5928 instruct loadConN0(iRegNdst dst, immN_0 src) %{
 5929   match(Set dst src);
 5930   ins_cost(DEFAULT_COST);
 5931 
 5932   format %{ "LI      $dst, $src \t// compressed ptr" %}
 5933   size(4);
 5934   ins_encode %{
 5935     __ li($dst$$Register, 0);
 5936   %}
 5937   ins_pipe(pipe_class_default);
 5938 %}
 5939 
 5940 // Load hi part of compressed oop constant.
 5941 instruct loadConN_hi(iRegNdst dst, immN src) %{
 5942   effect(DEF dst, USE src);
 5943   ins_cost(DEFAULT_COST);
 5944 
 5945   format %{ "LIS     $dst, $src \t// narrow oop hi" %}
 5946   size(4);
 5947   ins_encode %{
 5948     __ lis($dst$$Register, (int)(short)(($src$$constant >> 16) & 0xffff));
 5949   %}
 5950   ins_pipe(pipe_class_default);
 5951 %}
 5952 
 5953 // Add lo part of compressed oop constant to already loaded hi part.
 5954 instruct loadConN_lo(iRegNdst dst, iRegNsrc src1, immN src2) %{
 5955   effect(DEF dst, USE src1, USE src2);
 5956   ins_cost(DEFAULT_COST);
 5957 
 5958   format %{ "ORI     $dst, $src1, $src2 \t// narrow oop lo" %}
 5959   size(4);
 5960   ins_encode %{
 5961     assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
 5962     int oop_index = __ oop_recorder()->find_index((jobject)$src2$$constant);
 5963     RelocationHolder rspec = oop_Relocation::spec(oop_index);
 5964     __ relocate(rspec, 1);
 5965     __ ori($dst$$Register, $src1$$Register, $src2$$constant & 0xffff);
 5966   %}
 5967   ins_pipe(pipe_class_default);
 5968 %}
 5969 
 5970 instruct rldicl(iRegLdst dst, iRegLsrc src, immI16 shift, immI16 mask_begin) %{
 5971   effect(DEF dst, USE src, USE shift, USE mask_begin);
 5972 
 5973   size(4);
 5974   ins_encode %{
 5975     __ rldicl($dst$$Register, $src$$Register, $shift$$constant, $mask_begin$$constant);
 5976   %}
 5977   ins_pipe(pipe_class_default);
 5978 %}
 5979 
 5980 // Needed to postalloc expand loadConN: ConN is loaded as ConI
 5981 // leaving the upper 32 bits with sign-extension bits.
 5982 // This clears these bits: dst = src & 0xFFFFFFFF.
 5983 // TODO: Eventually call this maskN_regN_FFFFFFFF.
 5984 instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{
 5985   effect(DEF dst, USE src);
 5986   predicate(false);
 5987 
 5988   format %{ "MASK    $dst, $src, 0xFFFFFFFF" %} // mask
 5989   size(4);
 5990   ins_encode %{
 5991     __ clrldi($dst$$Register, $src$$Register, 0x20);
 5992   %}
 5993   ins_pipe(pipe_class_default);
 5994 %}
 5995 
 5996 // Optimize DecodeN for disjoint base.
 5997 // Load base of compressed oops into a register
 5998 instruct loadBase(iRegLdst dst) %{
 5999   effect(DEF dst);
 6000 
 6001   format %{ "LoadConst $dst, heapbase" %}
 6002   ins_encode %{
 6003     __ load_const_optimized($dst$$Register, CompressedOops::base(), R0);
 6004   %}
 6005   ins_pipe(pipe_class_default);
 6006 %}
 6007 
 6008 // Loading ConN must be postalloc expanded so that edges between
 6009 // the nodes are safe. They may not interfere with a safepoint.
 6010 // GL TODO: This needs three instructions: better put this into the constant pool.
 6011 instruct loadConN_Ex(iRegNdst dst, immN src) %{
 6012   match(Set dst src);
 6013   ins_cost(DEFAULT_COST*2);
 6014 
 6015   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6016   postalloc_expand %{
 6017     MachNode *m1 = new loadConN_hiNode();
 6018     MachNode *m2 = new loadConN_loNode();
 6019     MachNode *m3 = new clearMs32bNode();
 6020     m1->add_req(NULL);
 6021     m2->add_req(NULL, m1);
 6022     m3->add_req(NULL, m2);
 6023     m1->_opnds[0] = op_dst;
 6024     m1->_opnds[1] = op_src;
 6025     m2->_opnds[0] = op_dst;
 6026     m2->_opnds[1] = op_dst;
 6027     m2->_opnds[2] = op_src;
 6028     m3->_opnds[0] = op_dst;
 6029     m3->_opnds[1] = op_dst;
 6030     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6031     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6032     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6033     nodes->push(m1);
 6034     nodes->push(m2);
 6035     nodes->push(m3);
 6036   %}
 6037 %}
 6038 
 6039 // We have seen a safepoint between the hi and lo parts, and this node was handled
 6040 // as an oop. Therefore this needs a match rule so that build_oop_map knows this is
 6041 // not a narrow oop.
 6042 instruct loadConNKlass_hi(iRegNdst dst, immNKlass_NM src) %{
 6043   match(Set dst src);
 6044   effect(DEF dst, USE src);
 6045   ins_cost(DEFAULT_COST);
 6046 
 6047   format %{ "LIS     $dst, $src \t// narrow klass hi" %}
 6048   size(4);
 6049   ins_encode %{
 6050     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src$$constant);
 6051     __ lis($dst$$Register, (int)(short)((Csrc >> 16) & 0xffff));
 6052   %}
 6053   ins_pipe(pipe_class_default);
 6054 %}
 6055 
 6056 // As loadConNKlass_hi this must be recognized as narrow klass, not oop!
 6057 instruct loadConNKlass_mask(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6058   match(Set dst src1);
 6059   effect(TEMP src2);
 6060   ins_cost(DEFAULT_COST);
 6061 
 6062   format %{ "MASK    $dst, $src2, 0xFFFFFFFF" %} // mask
 6063   size(4);
 6064   ins_encode %{
 6065     __ clrldi($dst$$Register, $src2$$Register, 0x20);
 6066   %}
 6067   ins_pipe(pipe_class_default);
 6068 %}
 6069 
 6070 // This needs a match rule so that build_oop_map knows this is
 6071 // not a narrow oop.
 6072 instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{
 6073   match(Set dst src1);
 6074   effect(TEMP src2);
 6075   ins_cost(DEFAULT_COST);
 6076 
 6077   format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
 6078   size(4);
 6079   ins_encode %{
 6080     intptr_t Csrc = CompressedKlassPointers::encode((Klass *)$src1$$constant);
 6081     assert(__ oop_recorder() != NULL, "this assembler needs an OopRecorder");
 6082     int klass_index = __ oop_recorder()->find_index((Klass *)$src1$$constant);
 6083     RelocationHolder rspec = metadata_Relocation::spec(klass_index);
 6084 
 6085     __ relocate(rspec, 1);
 6086     __ ori($dst$$Register, $src2$$Register, Csrc & 0xffff);
 6087   %}
 6088   ins_pipe(pipe_class_default);
 6089 %}
 6090 
 6091 // Loading ConNKlass must be postalloc expanded so that edges between
 6092 // the nodes are safe. They may not interfere with a safepoint.
 6093 instruct loadConNKlass_Ex(iRegNdst dst, immNKlass src) %{
 6094   match(Set dst src);
 6095   ins_cost(DEFAULT_COST*2);
 6096 
 6097   format %{ "LoadN   $dst, $src \t// postalloc expanded" %} // mask
 6098   postalloc_expand %{
 6099     // Load high bits into register. Sign extended.
 6100     MachNode *m1 = new loadConNKlass_hiNode();
 6101     m1->add_req(NULL);
 6102     m1->_opnds[0] = op_dst;
 6103     m1->_opnds[1] = op_src;
 6104     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6105     nodes->push(m1);
 6106 
 6107     MachNode *m2 = m1;
 6108     if (!Assembler::is_uimm((jlong)CompressedKlassPointers::encode((Klass *)op_src->constant()), 31)) {
 6109       // Value might be 1-extended. Mask out these bits.
 6110       m2 = new loadConNKlass_maskNode();
 6111       m2->add_req(NULL, m1);
 6112       m2->_opnds[0] = op_dst;
 6113       m2->_opnds[1] = op_src;
 6114       m2->_opnds[2] = op_dst;
 6115       ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6116       nodes->push(m2);
 6117     }
 6118 
 6119     MachNode *m3 = new loadConNKlass_loNode();
 6120     m3->add_req(NULL, m2);
 6121     m3->_opnds[0] = op_dst;
 6122     m3->_opnds[1] = op_src;
 6123     m3->_opnds[2] = op_dst;
 6124     ra_->set_pair(m3->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6125     nodes->push(m3);
 6126   %}
 6127 %}
 6128 
 6129 // 0x1 is used in object initialization (initial object header).
 6130 // No constant pool entries required.
 6131 instruct loadConP0or1(iRegPdst dst, immP_0or1 src) %{
 6132   match(Set dst src);
 6133 
 6134   format %{ "LI      $dst, $src \t// ptr" %}
 6135   size(4);
 6136   ins_encode %{
 6137     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
 6138   %}
 6139   ins_pipe(pipe_class_default);
 6140 %}
 6141 
 6142 // Expand node for constant pool load: small offset.
 6143 // The match rule is needed to generate the correct bottom_type(),
 6144 // however this node should never match. The use of predicate is not
 6145 // possible since ADLC forbids predicates for chain rules. The higher
 6146 // costs do not prevent matching in this case. For that reason the
 6147 // operand immP_NM with predicate(false) is used.
 6148 instruct loadConP(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6149   match(Set dst src);
 6150   effect(TEMP toc);
 6151 
 6152   ins_num_consts(1);
 6153 
 6154   format %{ "LD      $dst, offset, $toc \t// load ptr $src from TOC" %}
 6155   size(4);
 6156   ins_encode( enc_load_long_constP(dst, src, toc) );
 6157   ins_pipe(pipe_class_memory);
 6158 %}
 6159 
 6160 // Expand node for constant pool load: large offset.
 6161 instruct loadConP_hi(iRegPdst dst, immP_NM src, iRegLdst toc) %{
 6162   effect(DEF dst, USE src, USE toc);
 6163   predicate(false);
 6164 
 6165   ins_num_consts(1);
 6166   ins_field_const_toc_offset(int);
 6167 
 6168   format %{ "ADDIS   $dst, $toc, offset \t// load ptr $src from TOC (hi)" %}
 6169   size(4);
 6170   ins_encode( enc_load_long_constP_hi(dst, src, toc) );
 6171   ins_pipe(pipe_class_default);
 6172 %}
 6173 
 6174 // Expand node for constant pool load: large offset.
 6175 instruct loadConP_lo(iRegPdst dst, immP_NM src, iRegLdst base) %{
 6176   match(Set dst src);
 6177   effect(TEMP base);
 6178 
 6179   ins_field_const_toc_offset_hi_node(loadConP_hiNode*);
 6180 
 6181   format %{ "LD      $dst, offset, $base \t// load ptr $src from TOC (lo)" %}
 6182   size(4);
 6183   ins_encode %{
 6184     int offset = ra_->C->output()->in_scratch_emit_size() ? 0 : _const_toc_offset_hi_node->_const_toc_offset;
 6185     __ ld($dst$$Register, MacroAssembler::largeoffset_si16_si16_lo(offset), $base$$Register);
 6186   %}
 6187   ins_pipe(pipe_class_memory);
 6188 %}
 6189 
 6190 // Load pointer constant from constant table. Expand in case an
 6191 // offset > 16 bit is needed.
 6192 // Adlc adds toc node MachConstantTableBase.
 6193 instruct loadConP_Ex(iRegPdst dst, immP src) %{
 6194   match(Set dst src);
 6195   ins_cost(MEMORY_REF_COST);
 6196 
 6197   // This rule does not use "expand" because then
 6198   // the result type is not known to be an Oop.  An ADLC
 6199   // enhancement will be needed to make that work - not worth it!
 6200 
 6201   // If this instruction rematerializes, it prolongs the live range
 6202   // of the toc node, causing illegal graphs.
 6203   // assert(edge_from_to(_reg_node[reg_lo],def)) fails in verify_good_schedule().
 6204   ins_cannot_rematerialize(true);
 6205 
 6206   format %{ "LD    $dst, offset, $constanttablebase \t//  load ptr $src from table, postalloc expanded" %}
 6207   postalloc_expand( postalloc_expand_load_ptr_constant(dst, src, constanttablebase) );
 6208 %}
 6209 
 6210 // Expand node for constant pool load: small offset.
 6211 instruct loadConF(regF dst, immF src, iRegLdst toc) %{
 6212   effect(DEF dst, USE src, USE toc);
 6213   ins_cost(MEMORY_REF_COST);
 6214 
 6215   ins_num_consts(1);
 6216 
 6217   format %{ "LFS     $dst, offset, $toc \t// load float $src from TOC" %}
 6218   size(4);
 6219   ins_encode %{
 6220     address float_address = __ float_constant($src$$constant);
 6221     if (float_address == NULL) {
 6222       ciEnv::current()->record_out_of_memory_failure();
 6223       return;
 6224     }
 6225     __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
 6226   %}
 6227   ins_pipe(pipe_class_memory);
 6228 %}
 6229 
 6230 // Expand node for constant pool load: large offset.
 6231 instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
 6232   effect(DEF dst, USE src, USE toc);
 6233   ins_cost(MEMORY_REF_COST);
 6234 
 6235   ins_num_consts(1);
 6236 
 6237   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6238             "LFS     $dst, offset_lo, $toc \t// load float $src from TOC (hi/lo)\n\t"
 6239             "ADDIS   $toc, $toc, -offset_hi"%}
 6240   size(12);
 6241   ins_encode %{
 6242     FloatRegister Rdst    = $dst$$FloatRegister;
 6243     Register Rtoc         = $toc$$Register;
 6244     address float_address = __ float_constant($src$$constant);
 6245     if (float_address == NULL) {
 6246       ciEnv::current()->record_out_of_memory_failure();
 6247       return;
 6248     }
 6249     int offset            = __ offset_to_method_toc(float_address);
 6250     int hi = (offset + (1<<15))>>16;
 6251     int lo = offset - hi * (1<<16);
 6252 
 6253     __ addis(Rtoc, Rtoc, hi);
 6254     __ lfs(Rdst, lo, Rtoc);
 6255     __ addis(Rtoc, Rtoc, -hi);
 6256   %}
 6257   ins_pipe(pipe_class_memory);
 6258 %}
 6259 
 6260 // Adlc adds toc node MachConstantTableBase.
 6261 instruct loadConF_Ex(regF dst, immF src) %{
 6262   match(Set dst src);
 6263   ins_cost(MEMORY_REF_COST);
 6264 
 6265   // See loadConP.
 6266   ins_cannot_rematerialize(true);
 6267 
 6268   format %{ "LFS     $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6269   postalloc_expand( postalloc_expand_load_float_constant(dst, src, constanttablebase) );
 6270 %}
 6271 
 6272 // Expand node for constant pool load: small offset.
 6273 instruct loadConD(regD dst, immD src, iRegLdst toc) %{
 6274   effect(DEF dst, USE src, USE toc);
 6275   ins_cost(MEMORY_REF_COST);
 6276 
 6277   ins_num_consts(1);
 6278 
 6279   format %{ "LFD     $dst, offset, $toc \t// load double $src from TOC" %}
 6280   size(4);
 6281   ins_encode %{
 6282     address float_address = __ double_constant($src$$constant);
 6283     if (float_address == NULL) {
 6284       ciEnv::current()->record_out_of_memory_failure();
 6285       return;
 6286     }
 6287     int offset =  __ offset_to_method_toc(float_address);
 6288     __ lfd($dst$$FloatRegister, offset, $toc$$Register);
 6289   %}
 6290   ins_pipe(pipe_class_memory);
 6291 %}
 6292 
 6293 // Expand node for constant pool load: large offset.
 6294 instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
 6295   effect(DEF dst, USE src, USE toc);
 6296   ins_cost(MEMORY_REF_COST);
 6297 
 6298   ins_num_consts(1);
 6299 
 6300   format %{ "ADDIS   $toc, $toc, offset_hi\n\t"
 6301             "LFD     $dst, offset_lo, $toc \t// load double $src from TOC (hi/lo)\n\t"
 6302             "ADDIS   $toc, $toc, -offset_hi" %}
 6303   size(12);
 6304   ins_encode %{
 6305     FloatRegister Rdst    = $dst$$FloatRegister;
 6306     Register      Rtoc    = $toc$$Register;
 6307     address float_address = __ double_constant($src$$constant);
 6308     if (float_address == NULL) {
 6309       ciEnv::current()->record_out_of_memory_failure();
 6310       return;
 6311     }
 6312     int offset = __ offset_to_method_toc(float_address);
 6313     int hi = (offset + (1<<15))>>16;
 6314     int lo = offset - hi * (1<<16);
 6315 
 6316     __ addis(Rtoc, Rtoc, hi);
 6317     __ lfd(Rdst, lo, Rtoc);
 6318     __ addis(Rtoc, Rtoc, -hi);
 6319   %}
 6320   ins_pipe(pipe_class_memory);
 6321 %}
 6322 
 6323 // Adlc adds toc node MachConstantTableBase.
 6324 instruct loadConD_Ex(regD dst, immD src) %{
 6325   match(Set dst src);
 6326   ins_cost(MEMORY_REF_COST);
 6327 
 6328   // See loadConP.
 6329   ins_cannot_rematerialize(true);
 6330 
 6331   format %{ "ConD    $dst, offset, $constanttablebase \t// load $src from table, postalloc expanded" %}
 6332   postalloc_expand( postalloc_expand_load_double_constant(dst, src, constanttablebase) );
 6333 %}
 6334 
 6335 // Prefetch instructions.
 6336 // Must be safe to execute with invalid address (cannot fault).
 6337 
 6338 // Special prefetch versions which use the dcbz instruction.
 6339 instruct prefetch_alloc_zero(indirectMemory mem, iRegLsrc src) %{
 6340   match(PrefetchAllocation (AddP mem src));
 6341   predicate(AllocatePrefetchStyle == 3);
 6342   ins_cost(MEMORY_REF_COST);
 6343 
 6344   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many with zero" %}
 6345   size(4);
 6346   ins_encode %{
 6347     __ dcbz($src$$Register, $mem$$base$$Register);
 6348   %}
 6349   ins_pipe(pipe_class_memory);
 6350 %}
 6351 
 6352 instruct prefetch_alloc_zero_no_offset(indirectMemory mem) %{
 6353   match(PrefetchAllocation mem);
 6354   predicate(AllocatePrefetchStyle == 3);
 6355   ins_cost(MEMORY_REF_COST);
 6356 
 6357   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many with zero" %}
 6358   size(4);
 6359   ins_encode %{
 6360     __ dcbz($mem$$base$$Register);
 6361   %}
 6362   ins_pipe(pipe_class_memory);
 6363 %}
 6364 
 6365 instruct prefetch_alloc(indirectMemory mem, iRegLsrc src) %{
 6366   match(PrefetchAllocation (AddP mem src));
 6367   predicate(AllocatePrefetchStyle != 3);
 6368   ins_cost(MEMORY_REF_COST);
 6369 
 6370   format %{ "PREFETCH $mem, 2, $src \t// Prefetch write-many" %}
 6371   size(4);
 6372   ins_encode %{
 6373     __ dcbtst($src$$Register, $mem$$base$$Register);
 6374   %}
 6375   ins_pipe(pipe_class_memory);
 6376 %}
 6377 
 6378 instruct prefetch_alloc_no_offset(indirectMemory mem) %{
 6379   match(PrefetchAllocation mem);
 6380   predicate(AllocatePrefetchStyle != 3);
 6381   ins_cost(MEMORY_REF_COST);
 6382 
 6383   format %{ "PREFETCH $mem, 2 \t// Prefetch write-many" %}
 6384   size(4);
 6385   ins_encode %{
 6386     __ dcbtst($mem$$base$$Register);
 6387   %}
 6388   ins_pipe(pipe_class_memory);
 6389 %}
 6390 
 6391 //----------Store Instructions-------------------------------------------------
 6392 
 6393 // Store Byte
 6394 instruct storeB(memory mem, iRegIsrc src) %{
 6395   match(Set mem (StoreB mem src));
 6396   ins_cost(MEMORY_REF_COST);
 6397 
 6398   format %{ "STB     $src, $mem \t// byte" %}
 6399   size(4);
 6400   ins_encode %{
 6401     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6402     __ stb($src$$Register, Idisp, $mem$$base$$Register);
 6403   %}
 6404   ins_pipe(pipe_class_memory);
 6405 %}
 6406 
 6407 // Store Char/Short
 6408 instruct storeC(memory mem, iRegIsrc src) %{
 6409   match(Set mem (StoreC mem src));
 6410   ins_cost(MEMORY_REF_COST);
 6411 
 6412   format %{ "STH     $src, $mem \t// short" %}
 6413   size(4);
 6414   ins_encode %{
 6415     int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
 6416     __ sth($src$$Register, Idisp, $mem$$base$$Register);
 6417   %}
 6418   ins_pipe(pipe_class_memory);
 6419 %}
 6420 
 6421 // Store Integer
 6422 instruct storeI(memory mem, iRegIsrc src) %{
 6423   match(Set mem (StoreI mem src));
 6424   ins_cost(MEMORY_REF_COST);
 6425 
 6426   format %{ "STW     $src, $mem" %}
 6427   size(4);
 6428   ins_encode( enc_stw(src, mem) );
 6429   ins_pipe(pipe_class_memory);
 6430 %}
 6431 
 6432 // ConvL2I + StoreI.
 6433 instruct storeI_convL2I(memory mem, iRegLsrc src) %{
 6434   match(Set mem (StoreI mem (ConvL2I src)));
 6435   ins_cost(MEMORY_REF_COST);
 6436 
 6437   format %{ "STW     l2i($src), $mem" %}
 6438   size(4);
 6439   ins_encode( enc_stw(src, mem) );
 6440   ins_pipe(pipe_class_memory);
 6441 %}
 6442 
 6443 // Store Long
 6444 instruct storeL(memoryAlg4 mem, iRegLsrc src) %{
 6445   match(Set mem (StoreL mem src));
 6446   ins_cost(MEMORY_REF_COST);
 6447 
 6448   format %{ "STD     $src, $mem \t// long" %}
 6449   size(4);
 6450   ins_encode( enc_std(src, mem) );
 6451   ins_pipe(pipe_class_memory);
 6452 %}
 6453 
 6454 // Store super word nodes.
 6455 
 6456 // Store Aligned Packed Byte long register to memory
 6457 instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
 6458   predicate(n->as_StoreVector()->memory_size() == 8);
 6459   match(Set mem (StoreVector mem src));
 6460   ins_cost(MEMORY_REF_COST);
 6461 
 6462   format %{ "STD     $mem, $src \t// packed8B" %}
 6463   size(4);
 6464   ins_encode( enc_std(src, mem) );
 6465   ins_pipe(pipe_class_memory);
 6466 %}
 6467 
 6468 // Store Packed Byte long register to memory
 6469 instruct storeV16(indirect mem, vecX src) %{
 6470   predicate(n->as_StoreVector()->memory_size() == 16);
 6471   match(Set mem (StoreVector mem src));
 6472   ins_cost(MEMORY_REF_COST);
 6473 
 6474   format %{ "STXVD2X     $mem, $src \t// store 16-byte Vector" %}
 6475   size(4);
 6476   ins_encode %{
 6477     __ stxvd2x($src$$VectorSRegister, $mem$$Register);
 6478   %}
 6479   ins_pipe(pipe_class_default);
 6480 %}
 6481 
 6482 // Reinterpret: only one vector size used: either L or X
 6483 instruct reinterpretL(iRegLdst dst) %{
 6484   match(Set dst (VectorReinterpret dst));
 6485   ins_cost(0);
 6486   format %{ "reinterpret $dst" %}
 6487   ins_encode( /*empty*/ );
 6488   ins_pipe(pipe_class_empty);
 6489 %}
 6490 
 6491 instruct reinterpretX(vecX dst) %{
 6492   match(Set dst (VectorReinterpret dst));
 6493   ins_cost(0);
 6494   format %{ "reinterpret $dst" %}
 6495   ins_encode( /*empty*/ );
 6496   ins_pipe(pipe_class_empty);
 6497 %}
 6498 
 6499 // Store Compressed Oop
 6500 instruct storeN(memory dst, iRegN_P2N src) %{
 6501   match(Set dst (StoreN dst src));
 6502   ins_cost(MEMORY_REF_COST);
 6503 
 6504   format %{ "STW     $src, $dst \t// compressed oop" %}
 6505   size(4);
 6506   ins_encode( enc_stw(src, dst) );
 6507   ins_pipe(pipe_class_memory);
 6508 %}
 6509 
 6510 // Store Compressed KLass
 6511 instruct storeNKlass(memory dst, iRegN_P2N src) %{
 6512   match(Set dst (StoreNKlass dst src));
 6513   ins_cost(MEMORY_REF_COST);
 6514 
 6515   format %{ "STW     $src, $dst \t// compressed klass" %}
 6516   size(4);
 6517   ins_encode( enc_stw(src, dst) );
 6518   ins_pipe(pipe_class_memory);
 6519 %}
 6520 
 6521 // Store Pointer
 6522 instruct storeP(memoryAlg4 dst, iRegPsrc src) %{
 6523   match(Set dst (StoreP dst src));
 6524   ins_cost(MEMORY_REF_COST);
 6525 
 6526   format %{ "STD     $src, $dst \t// ptr" %}
 6527   size(4);
 6528   ins_encode( enc_std(src, dst) );
 6529   ins_pipe(pipe_class_memory);
 6530 %}
 6531 
 6532 // Store Float
 6533 instruct storeF(memory mem, regF src) %{
 6534   match(Set mem (StoreF mem src));
 6535   ins_cost(MEMORY_REF_COST);
 6536 
 6537   format %{ "STFS    $src, $mem" %}
 6538   size(4);
 6539   ins_encode( enc_stfs(src, mem) );
 6540   ins_pipe(pipe_class_memory);
 6541 %}
 6542 
 6543 // Store Double
 6544 instruct storeD(memory mem, regD src) %{
 6545   match(Set mem (StoreD mem src));
 6546   ins_cost(MEMORY_REF_COST);
 6547 
 6548   format %{ "STFD    $src, $mem" %}
 6549   size(4);
 6550   ins_encode( enc_stfd(src, mem) );
 6551   ins_pipe(pipe_class_memory);
 6552 %}
 6553 
 6554 //----------Store Instructions With Zeros--------------------------------------
 6555 
 6556 instruct storeCM(memory mem, immI_0 zero) %{
 6557   match(Set mem (StoreCM mem zero));
 6558   ins_cost(MEMORY_REF_COST);
 6559 
 6560   format %{ "STB     #0, $mem \t// CMS card-mark byte store" %}
 6561   size(8);
 6562   ins_encode %{
 6563     __ li(R0, 0);
 6564     // No release barrier: Oops are allowed to get visible after marking.
 6565     guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
 6566     __ stb(R0, $mem$$disp, $mem$$base$$Register);
 6567   %}
 6568   ins_pipe(pipe_class_memory);
 6569 %}
 6570 
 6571 // Convert oop pointer into compressed form.
 6572 
 6573 // Nodes for postalloc expand.
 6574 
 6575 // Shift node for expand.
 6576 instruct encodeP_shift(iRegNdst dst, iRegNsrc src) %{
 6577   // The match rule is needed to make it a 'MachTypeNode'!
 6578   match(Set dst (EncodeP src));
 6579   predicate(false);
 6580 
 6581   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6582   size(4);
 6583   ins_encode %{
 6584     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6585   %}
 6586   ins_pipe(pipe_class_default);
 6587 %}
 6588 
 6589 // Add node for expand.
 6590 instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{
 6591   // The match rule is needed to make it a 'MachTypeNode'!
 6592   match(Set dst (EncodeP src));
 6593   predicate(false);
 6594 
 6595   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
 6596   ins_encode %{
 6597     __ sub_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6598   %}
 6599   ins_pipe(pipe_class_default);
 6600 %}
 6601 
 6602 // Conditional sub base.
 6603 instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6604   // The match rule is needed to make it a 'MachTypeNode'!
 6605   match(Set dst (EncodeP (Binary crx src1)));
 6606   predicate(false);
 6607 
 6608   format %{ "BEQ     $crx, done\n\t"
 6609             "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
 6610             "done:" %}
 6611   ins_encode %{
 6612     Label done;
 6613     __ beq($crx$$CondRegister, done);
 6614     __ sub_const_optimized($dst$$Register, $src1$$Register, CompressedOops::base(), R0);
 6615     __ bind(done);
 6616   %}
 6617   ins_pipe(pipe_class_default);
 6618 %}
 6619 
 6620 // Power 7 can use isel instruction
 6621 instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6622   // The match rule is needed to make it a 'MachTypeNode'!
 6623   match(Set dst (EncodeP (Binary crx src1)));
 6624   predicate(false);
 6625 
 6626   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// encode: preserve 0" %}
 6627   size(4);
 6628   ins_encode %{
 6629     // This is a Power7 instruction for which no machine description exists.
 6630     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6631   %}
 6632   ins_pipe(pipe_class_default);
 6633 %}
 6634 
 6635 // Disjoint narrow oop base.
 6636 instruct encodeP_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6637   match(Set dst (EncodeP src));
 6638   predicate(CompressedOops::base_disjoint());
 6639 
 6640   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6641   size(4);
 6642   ins_encode %{
 6643     __ rldicl($dst$$Register, $src$$Register, 64-CompressedOops::shift(), 32);
 6644   %}
 6645   ins_pipe(pipe_class_default);
 6646 %}
 6647 
 6648 // shift != 0, base != 0
 6649 instruct encodeP_Ex(iRegNdst dst, flagsReg crx, iRegPsrc src) %{
 6650   match(Set dst (EncodeP src));
 6651   effect(TEMP crx);
 6652   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull &&
 6653             CompressedOops::shift() != 0 &&
 6654             CompressedOops::base_overlaps());
 6655 
 6656   format %{ "EncodeP $dst, $crx, $src \t// postalloc expanded" %}
 6657   postalloc_expand( postalloc_expand_encode_oop(dst, src, crx));
 6658 %}
 6659 
 6660 // shift != 0, base != 0
 6661 instruct encodeP_not_null_Ex(iRegNdst dst, iRegPsrc src) %{
 6662   match(Set dst (EncodeP src));
 6663   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull &&
 6664             CompressedOops::shift() != 0 &&
 6665             CompressedOops::base_overlaps());
 6666 
 6667   format %{ "EncodeP $dst, $src\t// $src != Null, postalloc expanded" %}
 6668   postalloc_expand( postalloc_expand_encode_oop_not_null(dst, src) );
 6669 %}
 6670 
 6671 // shift != 0, base == 0
 6672 // TODO: This is the same as encodeP_shift. Merge!
 6673 instruct encodeP_not_null_base_null(iRegNdst dst, iRegPsrc src) %{
 6674   match(Set dst (EncodeP src));
 6675   predicate(CompressedOops::shift() != 0 &&
 6676             CompressedOops::base() ==0);
 6677 
 6678   format %{ "SRDI    $dst, $src, #3 \t// encodeP, $src != NULL" %}
 6679   size(4);
 6680   ins_encode %{
 6681     __ srdi($dst$$Register, $src$$Register, CompressedOops::shift() & 0x3f);
 6682   %}
 6683   ins_pipe(pipe_class_default);
 6684 %}
 6685 
 6686 // Compressed OOPs with narrow_oop_shift == 0.
 6687 // shift == 0, base == 0
 6688 instruct encodeP_narrow_oop_shift_0(iRegNdst dst, iRegPsrc src) %{
 6689   match(Set dst (EncodeP src));
 6690   predicate(CompressedOops::shift() == 0);
 6691 
 6692   format %{ "MR      $dst, $src \t// Ptr->Narrow" %}
 6693   // variable size, 0 or 4.
 6694   ins_encode %{
 6695     __ mr_if_needed($dst$$Register, $src$$Register);
 6696   %}
 6697   ins_pipe(pipe_class_default);
 6698 %}
 6699 
 6700 // Decode nodes.
 6701 
 6702 // Shift node for expand.
 6703 instruct decodeN_shift(iRegPdst dst, iRegPsrc src) %{
 6704   // The match rule is needed to make it a 'MachTypeNode'!
 6705   match(Set dst (DecodeN src));
 6706   predicate(false);
 6707 
 6708   format %{ "SLDI    $dst, $src, #3 \t// DecodeN" %}
 6709   size(4);
 6710   ins_encode %{
 6711     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6712   %}
 6713   ins_pipe(pipe_class_default);
 6714 %}
 6715 
 6716 // Add node for expand.
 6717 instruct decodeN_add(iRegPdst dst, iRegPdst src) %{
 6718   // The match rule is needed to make it a 'MachTypeNode'!
 6719   match(Set dst (DecodeN src));
 6720   predicate(false);
 6721 
 6722   format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
 6723   ins_encode %{
 6724     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6725   %}
 6726   ins_pipe(pipe_class_default);
 6727 %}
 6728 
 6729 // conditianal add base for expand
 6730 instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
 6731   // The match rule is needed to make it a 'MachTypeNode'!
 6732   // NOTICE that the rule is nonsense - we just have to make sure that:
 6733   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6734   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6735   match(Set dst (DecodeN (Binary crx src)));
 6736   predicate(false);
 6737 
 6738   format %{ "BEQ     $crx, done\n\t"
 6739             "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
 6740             "done:" %}
 6741   ins_encode %{
 6742     Label done;
 6743     __ beq($crx$$CondRegister, done);
 6744     __ add_const_optimized($dst$$Register, $src$$Register, CompressedOops::base(), R0);
 6745     __ bind(done);
 6746   %}
 6747   ins_pipe(pipe_class_default);
 6748 %}
 6749 
 6750 instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
 6751   // The match rule is needed to make it a 'MachTypeNode'!
 6752   // NOTICE that the rule is nonsense - we just have to make sure that:
 6753   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
 6754   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
 6755   match(Set dst (DecodeN (Binary crx src1)));
 6756   predicate(false);
 6757 
 6758   format %{ "CMOVE   $dst, $crx eq, 0, $src1 \t// decode: preserve 0" %}
 6759   size(4);
 6760   ins_encode %{
 6761     // This is a Power7 instruction for which no machine description exists.
 6762     __ isel_0($dst$$Register, $crx$$CondRegister, Assembler::equal, $src1$$Register);
 6763   %}
 6764   ins_pipe(pipe_class_default);
 6765 %}
 6766 
 6767 //  shift != 0, base != 0
 6768 instruct decodeN_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6769   match(Set dst (DecodeN src));
 6770   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6771              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6772             CompressedOops::shift() != 0 &&
 6773             CompressedOops::base() != 0);
 6774   ins_cost(4 * DEFAULT_COST); // Should be more expensive than decodeN_Disjoint_isel_Ex.
 6775   effect(TEMP crx);
 6776 
 6777   format %{ "DecodeN $dst, $src \t// Kills $crx, postalloc expanded" %}
 6778   postalloc_expand( postalloc_expand_decode_oop(dst, src, crx) );
 6779 %}
 6780 
 6781 // shift != 0, base == 0
 6782 instruct decodeN_nullBase(iRegPdst dst, iRegNsrc src) %{
 6783   match(Set dst (DecodeN src));
 6784   predicate(CompressedOops::shift() != 0 &&
 6785             CompressedOops::base() == 0);
 6786 
 6787   format %{ "SLDI    $dst, $src, #3 \t// DecodeN (zerobased)" %}
 6788   size(4);
 6789   ins_encode %{
 6790     __ sldi($dst$$Register, $src$$Register, CompressedOops::shift());
 6791   %}
 6792   ins_pipe(pipe_class_default);
 6793 %}
 6794 
 6795 // Optimize DecodeN for disjoint base.
 6796 // Shift narrow oop and or it into register that already contains the heap base.
 6797 // Base == dst must hold, and is assured by construction in postaloc_expand.
 6798 instruct decodeN_mergeDisjoint(iRegPdst dst, iRegNsrc src, iRegLsrc base) %{
 6799   match(Set dst (DecodeN src));
 6800   effect(TEMP base);
 6801   predicate(false);
 6802 
 6803   format %{ "RLDIMI  $dst, $src, shift, 32-shift \t// DecodeN (disjoint base)" %}
 6804   size(4);
 6805   ins_encode %{
 6806     __ rldimi($dst$$Register, $src$$Register, CompressedOops::shift(), 32-CompressedOops::shift());
 6807   %}
 6808   ins_pipe(pipe_class_default);
 6809 %}
 6810 
 6811 // Optimize DecodeN for disjoint base.
 6812 // This node requires only one cycle on the critical path.
 6813 // We must postalloc_expand as we can not express use_def effects where
 6814 // the used register is L and the def'ed register P.
 6815 instruct decodeN_Disjoint_notNull_Ex(iRegPdst dst, iRegNsrc src) %{
 6816   match(Set dst (DecodeN src));
 6817   effect(TEMP_DEF dst);
 6818   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6819              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6820             CompressedOops::base_disjoint());
 6821   ins_cost(DEFAULT_COST);
 6822 
 6823   format %{ "MOV     $dst, heapbase \t\n"
 6824             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
 6825   postalloc_expand %{
 6826     loadBaseNode *n1 = new loadBaseNode();
 6827     n1->add_req(NULL);
 6828     n1->_opnds[0] = op_dst;
 6829 
 6830     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6831     n2->add_req(n_region, n_src, n1);
 6832     n2->_opnds[0] = op_dst;
 6833     n2->_opnds[1] = op_src;
 6834     n2->_opnds[2] = op_dst;
 6835     n2->_bottom_type = _bottom_type;
 6836 
 6837     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6838     ra_->set_oop(n2, true);
 6839 
 6840     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6841     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6842 
 6843     nodes->push(n1);
 6844     nodes->push(n2);
 6845   %}
 6846 %}
 6847 
 6848 instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{
 6849   match(Set dst (DecodeN src));
 6850   effect(TEMP_DEF dst, TEMP crx);
 6851   predicate((n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
 6852              n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant) &&
 6853             CompressedOops::base_disjoint() && VM_Version::has_isel());
 6854   ins_cost(3 * DEFAULT_COST);
 6855 
 6856   format %{ "DecodeN  $dst, $src \t// decode with disjoint base using isel" %}
 6857   postalloc_expand %{
 6858     loadBaseNode *n1 = new loadBaseNode();
 6859     n1->add_req(NULL);
 6860     n1->_opnds[0] = op_dst;
 6861 
 6862     cmpN_reg_imm0Node *n_compare  = new cmpN_reg_imm0Node();
 6863     n_compare->add_req(n_region, n_src);
 6864     n_compare->_opnds[0] = op_crx;
 6865     n_compare->_opnds[1] = op_src;
 6866     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
 6867 
 6868     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
 6869     n2->add_req(n_region, n_src, n1);
 6870     n2->_opnds[0] = op_dst;
 6871     n2->_opnds[1] = op_src;
 6872     n2->_opnds[2] = op_dst;
 6873     n2->_bottom_type = _bottom_type;
 6874 
 6875     cond_set_0_ptrNode *n_cond_set = new cond_set_0_ptrNode();
 6876     n_cond_set->add_req(n_region, n_compare, n2);
 6877     n_cond_set->_opnds[0] = op_dst;
 6878     n_cond_set->_opnds[1] = op_crx;
 6879     n_cond_set->_opnds[2] = op_dst;
 6880     n_cond_set->_bottom_type = _bottom_type;
 6881 
 6882     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
 6883     ra_->set_oop(n_cond_set, true);
 6884 
 6885     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6886     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
 6887     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6888     ra_->set_pair(n_cond_set->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 6889 
 6890     nodes->push(n1);
 6891     nodes->push(n_compare);
 6892     nodes->push(n2);
 6893     nodes->push(n_cond_set);
 6894   %}
 6895 %}
 6896 
 6897 // src != 0, shift != 0, base != 0
 6898 instruct decodeN_notNull_addBase_Ex(iRegPdst dst, iRegNsrc src) %{
 6899   match(Set dst (DecodeN src));
 6900   predicate((n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
 6901              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
 6902             CompressedOops::shift() != 0 &&
 6903             CompressedOops::base() != 0);
 6904   ins_cost(2 * DEFAULT_COST);
 6905 
 6906   format %{ "DecodeN $dst, $src \t// $src != NULL, postalloc expanded" %}
 6907   postalloc_expand( postalloc_expand_decode_oop_not_null(dst, src));
 6908 %}
 6909 
 6910 // Compressed OOPs with narrow_oop_shift == 0.
 6911 instruct decodeN_unscaled(iRegPdst dst, iRegNsrc src) %{
 6912   match(Set dst (DecodeN src));
 6913   predicate(CompressedOops::shift() == 0);
 6914   ins_cost(DEFAULT_COST);
 6915 
 6916   format %{ "MR      $dst, $src \t// DecodeN (unscaled)" %}
 6917   // variable size, 0 or 4.
 6918   ins_encode %{
 6919     __ mr_if_needed($dst$$Register, $src$$Register);
 6920   %}
 6921   ins_pipe(pipe_class_default);
 6922 %}
 6923 
 6924 // Convert compressed oop into int for vectors alignment masking.
 6925 instruct decodeN2I_unscaled(iRegIdst dst, iRegNsrc src) %{
 6926   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6927   predicate(CompressedOops::shift() == 0);
 6928   ins_cost(DEFAULT_COST);
 6929 
 6930   format %{ "MR      $dst, $src \t// (int)DecodeN (unscaled)" %}
 6931   // variable size, 0 or 4.
 6932   ins_encode %{
 6933     __ mr_if_needed($dst$$Register, $src$$Register);
 6934   %}
 6935   ins_pipe(pipe_class_default);
 6936 %}
 6937 
 6938 // Convert klass pointer into compressed form.
 6939 
 6940 // Nodes for postalloc expand.
 6941 
 6942 // Shift node for expand.
 6943 instruct encodePKlass_shift(iRegNdst dst, iRegNsrc src) %{
 6944   // The match rule is needed to make it a 'MachTypeNode'!
 6945   match(Set dst (EncodePKlass src));
 6946   predicate(false);
 6947 
 6948   format %{ "SRDI    $dst, $src, 3 \t// encode" %}
 6949   size(4);
 6950   ins_encode %{
 6951     __ srdi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 6952   %}
 6953   ins_pipe(pipe_class_default);
 6954 %}
 6955 
 6956 // Add node for expand.
 6957 instruct encodePKlass_sub_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 6958   // The match rule is needed to make it a 'MachTypeNode'!
 6959   match(Set dst (EncodePKlass (Binary base src)));
 6960   predicate(false);
 6961 
 6962   format %{ "SUB     $dst, $base, $src \t// encode" %}
 6963   size(4);
 6964   ins_encode %{
 6965     __ subf($dst$$Register, $base$$Register, $src$$Register);
 6966   %}
 6967   ins_pipe(pipe_class_default);
 6968 %}
 6969 
 6970 // Disjoint narrow oop base.
 6971 instruct encodePKlass_Disjoint(iRegNdst dst, iRegPsrc src) %{
 6972   match(Set dst (EncodePKlass src));
 6973   predicate(false /* TODO: PPC port CompressedKlassPointers::base_disjoint()*/);
 6974 
 6975   format %{ "EXTRDI  $dst, $src, #32, #3 \t// encode with disjoint base" %}
 6976   size(4);
 6977   ins_encode %{
 6978     __ rldicl($dst$$Register, $src$$Register, 64-CompressedKlassPointers::shift(), 32);
 6979   %}
 6980   ins_pipe(pipe_class_default);
 6981 %}
 6982 
 6983 // shift != 0, base != 0
 6984 instruct encodePKlass_not_null_Ex(iRegNdst dst, iRegLsrc base, iRegPsrc src) %{
 6985   match(Set dst (EncodePKlass (Binary base src)));
 6986   predicate(false);
 6987 
 6988   format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 6989   postalloc_expand %{
 6990     encodePKlass_sub_baseNode *n1 = new encodePKlass_sub_baseNode();
 6991     n1->add_req(n_region, n_base, n_src);
 6992     n1->_opnds[0] = op_dst;
 6993     n1->_opnds[1] = op_base;
 6994     n1->_opnds[2] = op_src;
 6995     n1->_bottom_type = _bottom_type;
 6996 
 6997     encodePKlass_shiftNode *n2 = new encodePKlass_shiftNode();
 6998     n2->add_req(n_region, n1);
 6999     n2->_opnds[0] = op_dst;
 7000     n2->_opnds[1] = op_dst;
 7001     n2->_bottom_type = _bottom_type;
 7002     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7003     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7004 
 7005     nodes->push(n1);
 7006     nodes->push(n2);
 7007   %}
 7008 %}
 7009 
 7010 // shift != 0, base != 0
 7011 instruct encodePKlass_not_null_ExEx(iRegNdst dst, iRegPsrc src) %{
 7012   match(Set dst (EncodePKlass src));
 7013   //predicate(CompressedKlassPointers::shift() != 0 &&
 7014   //          true /* TODO: PPC port CompressedKlassPointers::base_overlaps()*/);
 7015 
 7016   //format %{ "EncodePKlass $dst, $src\t// $src != Null, postalloc expanded" %}
 7017   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7018   expand %{
 7019     immL baseImm %{ (jlong)(intptr_t)CompressedKlassPointers::base() %}
 7020     iRegLdst base;
 7021     loadConL_Ex(base, baseImm);
 7022     encodePKlass_not_null_Ex(dst, base, src);
 7023   %}
 7024 %}
 7025 
 7026 // Decode nodes.
 7027 
 7028 // Shift node for expand.
 7029 instruct decodeNKlass_shift(iRegPdst dst, iRegPsrc src) %{
 7030   // The match rule is needed to make it a 'MachTypeNode'!
 7031   match(Set dst (DecodeNKlass src));
 7032   predicate(false);
 7033 
 7034   format %{ "SLDI    $dst, $src, #3 \t// DecodeNKlass" %}
 7035   size(4);
 7036   ins_encode %{
 7037     __ sldi($dst$$Register, $src$$Register, CompressedKlassPointers::shift());
 7038   %}
 7039   ins_pipe(pipe_class_default);
 7040 %}
 7041 
 7042 // Add node for expand.
 7043 
 7044 instruct decodeNKlass_add_base(iRegPdst dst, iRegLsrc base, iRegPdst src) %{
 7045   // The match rule is needed to make it a 'MachTypeNode'!
 7046   match(Set dst (DecodeNKlass (Binary base src)));
 7047   predicate(false);
 7048 
 7049   format %{ "ADD     $dst, $base, $src \t// DecodeNKlass, add klass base" %}
 7050   size(4);
 7051   ins_encode %{
 7052     __ add($dst$$Register, $base$$Register, $src$$Register);
 7053   %}
 7054   ins_pipe(pipe_class_default);
 7055 %}
 7056 
 7057 // src != 0, shift != 0, base != 0
 7058 instruct decodeNKlass_notNull_addBase_Ex(iRegPdst dst, iRegLsrc base, iRegNsrc src) %{
 7059   match(Set dst (DecodeNKlass (Binary base src)));
 7060   //effect(kill src); // We need a register for the immediate result after shifting.
 7061   predicate(false);
 7062 
 7063   format %{ "DecodeNKlass $dst =  $base + ($src << 3) \t// $src != NULL, postalloc expanded" %}
 7064   postalloc_expand %{
 7065     decodeNKlass_add_baseNode *n1 = new decodeNKlass_add_baseNode();
 7066     n1->add_req(n_region, n_base, n_src);
 7067     n1->_opnds[0] = op_dst;
 7068     n1->_opnds[1] = op_base;
 7069     n1->_opnds[2] = op_src;
 7070     n1->_bottom_type = _bottom_type;
 7071 
 7072     decodeNKlass_shiftNode *n2 = new decodeNKlass_shiftNode();
 7073     n2->add_req(n_region, n1);
 7074     n2->_opnds[0] = op_dst;
 7075     n2->_opnds[1] = op_dst;
 7076     n2->_bottom_type = _bottom_type;
 7077 
 7078     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7079     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
 7080 
 7081     nodes->push(n1);
 7082     nodes->push(n2);
 7083   %}
 7084 %}
 7085 
 7086 // src != 0, shift != 0, base != 0
 7087 instruct decodeNKlass_notNull_addBase_ExEx(iRegPdst dst, iRegNsrc src) %{
 7088   match(Set dst (DecodeNKlass src));
 7089   // predicate(CompressedKlassPointers::shift() != 0 &&
 7090   //           CompressedKlassPointers::base() != 0);
 7091 
 7092   //format %{ "DecodeNKlass $dst, $src \t// $src != NULL, expanded" %}
 7093 
 7094   ins_cost(DEFAULT_COST*2);  // Don't count constant.
 7095   expand %{
 7096     // We add first, then we shift. Like this, we can get along with one register less.
 7097     // But we have to load the base pre-shifted.
 7098     immL baseImm %{ (jlong)((intptr_t)CompressedKlassPointers::base() >> CompressedKlassPointers::shift()) %}
 7099     iRegLdst base;
 7100     loadConL_Ex(base, baseImm);
 7101     decodeNKlass_notNull_addBase_Ex(dst, base, src);
 7102   %}
 7103 %}
 7104 
 7105 //----------MemBar Instructions-----------------------------------------------
 7106 // Memory barrier flavors
 7107 
 7108 instruct membar_acquire() %{
 7109   match(LoadFence);
 7110   ins_cost(4*MEMORY_REF_COST);
 7111 
 7112   format %{ "MEMBAR-acquire" %}
 7113   size(4);
 7114   ins_encode %{
 7115     __ acquire();
 7116   %}
 7117   ins_pipe(pipe_class_default);
 7118 %}
 7119 
 7120 instruct unnecessary_membar_acquire() %{
 7121   match(MemBarAcquire);
 7122   ins_cost(0);
 7123 
 7124   format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
 7125   size(0);
 7126   ins_encode( /*empty*/ );
 7127   ins_pipe(pipe_class_default);
 7128 %}
 7129 
 7130 instruct membar_acquire_lock() %{
 7131   match(MemBarAcquireLock);
 7132   ins_cost(0);
 7133 
 7134   format %{ " -- \t// redundant MEMBAR-acquire - empty (acquire as part of CAS in prior FastLock)" %}
 7135   size(0);
 7136   ins_encode( /*empty*/ );
 7137   ins_pipe(pipe_class_default);
 7138 %}
 7139 
 7140 instruct membar_release() %{
 7141   match(MemBarRelease);
 7142   match(StoreFence);
 7143   ins_cost(4*MEMORY_REF_COST);
 7144 
 7145   format %{ "MEMBAR-release" %}
 7146   size(4);
 7147   ins_encode %{
 7148     __ release();
 7149   %}
 7150   ins_pipe(pipe_class_default);
 7151 %}
 7152 
 7153 instruct membar_storestore() %{
 7154   match(MemBarStoreStore);
 7155   match(StoreStoreFence);
 7156   ins_cost(4*MEMORY_REF_COST);
 7157 
 7158   format %{ "MEMBAR-store-store" %}
 7159   size(4);
 7160   ins_encode %{
 7161     __ membar(Assembler::StoreStore);
 7162   %}
 7163   ins_pipe(pipe_class_default);
 7164 %}
 7165 
 7166 instruct membar_release_lock() %{
 7167   match(MemBarReleaseLock);
 7168   ins_cost(0);
 7169 
 7170   format %{ " -- \t// redundant MEMBAR-release - empty (release in FastUnlock)" %}
 7171   size(0);
 7172   ins_encode( /*empty*/ );
 7173   ins_pipe(pipe_class_default);
 7174 %}
 7175 
 7176 instruct membar_volatile() %{
 7177   match(MemBarVolatile);
 7178   ins_cost(4*MEMORY_REF_COST);
 7179 
 7180   format %{ "MEMBAR-volatile" %}
 7181   size(4);
 7182   ins_encode %{
 7183     __ fence();
 7184   %}
 7185   ins_pipe(pipe_class_default);
 7186 %}
 7187 
 7188 // This optimization is wrong on PPC. The following pattern is not supported:
 7189 //  MemBarVolatile
 7190 //   ^        ^
 7191 //   |        |
 7192 //  CtrlProj MemProj
 7193 //   ^        ^
 7194 //   |        |
 7195 //   |       Load
 7196 //   |
 7197 //  MemBarVolatile
 7198 //
 7199 //  The first MemBarVolatile could get optimized out! According to
 7200 //  Vladimir, this pattern can not occur on Oracle platforms.
 7201 //  However, it does occur on PPC64 (because of membars in
 7202 //  inline_unsafe_load_store).
 7203 //
 7204 // Add this node again if we found a good solution for inline_unsafe_load_store().
 7205 // Don't forget to look at the implementation of post_store_load_barrier again,
 7206 // we did other fixes in that method.
 7207 //instruct unnecessary_membar_volatile() %{
 7208 //  match(MemBarVolatile);
 7209 //  predicate(Matcher::post_store_load_barrier(n));
 7210 //  ins_cost(0);
 7211 //
 7212 //  format %{ " -- \t// redundant MEMBAR-volatile - empty" %}
 7213 //  size(0);
 7214 //  ins_encode( /*empty*/ );
 7215 //  ins_pipe(pipe_class_default);
 7216 //%}
 7217 
 7218 instruct membar_CPUOrder() %{
 7219   match(MemBarCPUOrder);
 7220   ins_cost(0);
 7221 
 7222   format %{ " -- \t// MEMBAR-CPUOrder - empty: PPC64 processors are self-consistent." %}
 7223   size(0);
 7224   ins_encode( /*empty*/ );
 7225   ins_pipe(pipe_class_default);
 7226 %}
 7227 
 7228 //----------Conditional Move---------------------------------------------------
 7229 
 7230 // Cmove using isel.
 7231 instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7232   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7233   predicate(VM_Version::has_isel());
 7234   ins_cost(DEFAULT_COST);
 7235 
 7236   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7237   size(4);
 7238   ins_encode %{
 7239     // This is a Power7 instruction for which no machine description
 7240     // exists. Anyways, the scheduler should be off on Power7.
 7241     int cc        = $cmp$$cmpcode;
 7242     __ isel($dst$$Register, $crx$$CondRegister,
 7243             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7244   %}
 7245   ins_pipe(pipe_class_default);
 7246 %}
 7247 
 7248 instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
 7249   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7250   predicate(!VM_Version::has_isel());
 7251   ins_cost(DEFAULT_COST+BRANCH_COST);
 7252 
 7253   ins_variable_size_depending_on_alignment(true);
 7254 
 7255   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7256   // Worst case is branch + move + stop, no stop without scheduler
 7257   size(8);
 7258   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7259   ins_pipe(pipe_class_default);
 7260 %}
 7261 
 7262 instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
 7263   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
 7264   ins_cost(DEFAULT_COST+BRANCH_COST);
 7265 
 7266   ins_variable_size_depending_on_alignment(true);
 7267 
 7268   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7269   // Worst case is branch + move + stop, no stop without scheduler
 7270   size(8);
 7271   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7272   ins_pipe(pipe_class_default);
 7273 %}
 7274 
 7275 // Cmove using isel.
 7276 instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7277   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7278   predicate(VM_Version::has_isel());
 7279   ins_cost(DEFAULT_COST);
 7280 
 7281   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7282   size(4);
 7283   ins_encode %{
 7284     // This is a Power7 instruction for which no machine description
 7285     // exists. Anyways, the scheduler should be off on Power7.
 7286     int cc        = $cmp$$cmpcode;
 7287     __ isel($dst$$Register, $crx$$CondRegister,
 7288             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7289   %}
 7290   ins_pipe(pipe_class_default);
 7291 %}
 7292 
 7293 instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
 7294   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7295   predicate(!VM_Version::has_isel());
 7296   ins_cost(DEFAULT_COST+BRANCH_COST);
 7297 
 7298   ins_variable_size_depending_on_alignment(true);
 7299 
 7300   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7301   // Worst case is branch + move + stop, no stop without scheduler.
 7302   size(8);
 7303   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7304   ins_pipe(pipe_class_default);
 7305 %}
 7306 
 7307 instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
 7308   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
 7309   ins_cost(DEFAULT_COST+BRANCH_COST);
 7310 
 7311   ins_variable_size_depending_on_alignment(true);
 7312 
 7313   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7314   // Worst case is branch + move + stop, no stop without scheduler.
 7315   size(8);
 7316   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7317   ins_pipe(pipe_class_default);
 7318 %}
 7319 
 7320 // Cmove using isel.
 7321 instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7322   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7323   predicate(VM_Version::has_isel());
 7324   ins_cost(DEFAULT_COST);
 7325 
 7326   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7327   size(4);
 7328   ins_encode %{
 7329     // This is a Power7 instruction for which no machine description
 7330     // exists. Anyways, the scheduler should be off on Power7.
 7331     int cc        = $cmp$$cmpcode;
 7332     __ isel($dst$$Register, $crx$$CondRegister,
 7333             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7334   %}
 7335   ins_pipe(pipe_class_default);
 7336 %}
 7337 
 7338 // Conditional move for RegN. Only cmov(reg, reg).
 7339 instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
 7340   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7341   predicate(!VM_Version::has_isel());
 7342   ins_cost(DEFAULT_COST+BRANCH_COST);
 7343 
 7344   ins_variable_size_depending_on_alignment(true);
 7345 
 7346   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7347   // Worst case is branch + move + stop, no stop without scheduler.
 7348   size(8);
 7349   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7350   ins_pipe(pipe_class_default);
 7351 %}
 7352 
 7353 instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
 7354   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
 7355   ins_cost(DEFAULT_COST+BRANCH_COST);
 7356 
 7357   ins_variable_size_depending_on_alignment(true);
 7358 
 7359   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7360   // Worst case is branch + move + stop, no stop without scheduler.
 7361   size(8);
 7362   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7363   ins_pipe(pipe_class_default);
 7364 %}
 7365 
 7366 // Cmove using isel.
 7367 instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
 7368   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7369   predicate(VM_Version::has_isel());
 7370   ins_cost(DEFAULT_COST);
 7371 
 7372   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7373   size(4);
 7374   ins_encode %{
 7375     // This is a Power7 instruction for which no machine description
 7376     // exists. Anyways, the scheduler should be off on Power7.
 7377     int cc        = $cmp$$cmpcode;
 7378     __ isel($dst$$Register, $crx$$CondRegister,
 7379             (Assembler::Condition)(cc & 3), /*invert*/((~cc) & 8), $src$$Register);
 7380   %}
 7381   ins_pipe(pipe_class_default);
 7382 %}
 7383 
 7384 instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
 7385   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7386   predicate(!VM_Version::has_isel());
 7387   ins_cost(DEFAULT_COST+BRANCH_COST);
 7388 
 7389   ins_variable_size_depending_on_alignment(true);
 7390 
 7391   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7392   // Worst case is branch + move + stop, no stop without scheduler.
 7393   size(8);
 7394   ins_encode( enc_cmove_reg(dst, crx, src, cmp) );
 7395   ins_pipe(pipe_class_default);
 7396 %}
 7397 
 7398 instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
 7399   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
 7400   ins_cost(DEFAULT_COST+BRANCH_COST);
 7401 
 7402   ins_variable_size_depending_on_alignment(true);
 7403 
 7404   format %{ "CMOVE   $cmp, $crx, $dst, $src\n\t" %}
 7405   // Worst case is branch + move + stop, no stop without scheduler.
 7406   size(8);
 7407   ins_encode( enc_cmove_imm(dst, crx, src, cmp) );
 7408   ins_pipe(pipe_class_default);
 7409 %}
 7410 
 7411 instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
 7412   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
 7413   ins_cost(DEFAULT_COST+BRANCH_COST);
 7414 
 7415   ins_variable_size_depending_on_alignment(true);
 7416 
 7417   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7418   // Worst case is branch + move + stop, no stop without scheduler.
 7419   size(8);
 7420   ins_encode %{
 7421     Label done;
 7422     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7423     // Branch if not (cmp crx).
 7424     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7425     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7426     __ bind(done);
 7427   %}
 7428   ins_pipe(pipe_class_default);
 7429 %}
 7430 
 7431 instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
 7432   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
 7433   ins_cost(DEFAULT_COST+BRANCH_COST);
 7434 
 7435   ins_variable_size_depending_on_alignment(true);
 7436 
 7437   format %{ "CMOVEF  $cmp, $crx, $dst, $src\n\t" %}
 7438   // Worst case is branch + move + stop, no stop without scheduler.
 7439   size(8);
 7440   ins_encode %{
 7441     Label done;
 7442     assert((Assembler::bcondCRbiIs1 & ~Assembler::bcondCRbiIs0) == 8, "check encoding");
 7443     // Branch if not (cmp crx).
 7444     __ bc(cc_to_inverse_boint($cmp$$cmpcode), cc_to_biint($cmp$$cmpcode, $crx$$reg), done);
 7445     __ fmr($dst$$FloatRegister, $src$$FloatRegister);
 7446     __ bind(done);
 7447   %}
 7448   ins_pipe(pipe_class_default);
 7449 %}
 7450 
 7451 //----------Conditional_store--------------------------------------------------
 7452 // Conditional-store of the updated heap-top.
 7453 // Used during allocation of the shared heap.
 7454 // Sets flags (EQ) on success. Implemented with a CASA on Sparc.
 7455 
 7456 // As compareAndSwapL, but return flag register instead of boolean value in
 7457 // int register.
 7458 // Used by sun/misc/AtomicLongCSImpl.java.
 7459 // Mem_ptr must be a memory operand, else this node does not get
 7460 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 7461 // can be rematerialized which leads to errors.
 7462 instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
 7463   match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
 7464   effect(TEMP cr0);
 7465   format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
 7466   ins_encode %{
 7467     __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
 7468                 MacroAssembler::MemBarAcq, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7469                 noreg, NULL, true);
 7470   %}
 7471   ins_pipe(pipe_class_default);
 7472 %}
 7473 
 7474 // As compareAndSwapP, but return flag register instead of boolean value in
 7475 // int register.
 7476 // This instruction is matched if UseTLAB is off.
 7477 // Mem_ptr must be a memory operand, else this node does not get
 7478 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 7479 // can be rematerialized which leads to errors.
 7480 instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
 7481   match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
 7482   ins_cost(2*MEMORY_REF_COST);
 7483   predicate(n->as_LoadStore()->barrier_data() == 0);
 7484 
 7485   format %{ "STDCX_  if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
 7486   ins_encode %{
 7487     __ stdcx_($newVal$$Register, $mem_ptr$$Register);
 7488   %}
 7489   ins_pipe(pipe_class_memory);
 7490 %}
 7491 
 7492 // Implement LoadPLocked. Must be ordered against changes of the memory location
 7493 // by storePConditional.
 7494 // Don't know whether this is ever used.
 7495 instruct loadPLocked(iRegPdst dst, memory mem) %{
 7496   match(Set dst (LoadPLocked mem));
 7497   ins_cost(2*MEMORY_REF_COST);
 7498 
 7499   format %{ "LDARX   $dst, $mem \t// loadPLocked\n\t" %}
 7500   size(4);
 7501   ins_encode %{
 7502     __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 7503   %}
 7504   ins_pipe(pipe_class_memory);
 7505 %}
 7506 
 7507 //----------Compare-And-Swap---------------------------------------------------
 7508 
 7509 // CompareAndSwap{P,I,L} have more than one output, therefore "CmpI
 7510 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 7511 // matched.
 7512 
 7513 // Strong versions:
 7514 
 7515 instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7516   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7517   predicate(VM_Version::has_lqarx());
 7518   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7519   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7520   ins_encode %{
 7521     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7522     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7523                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7524                 $res$$Register, true);
 7525     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7526       __ isync();
 7527     } else {
 7528       __ sync();
 7529     }
 7530   %}
 7531   ins_pipe(pipe_class_default);
 7532 %}
 7533 
 7534 instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7535   match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
 7536   predicate(!VM_Version::has_lqarx());
 7537   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7538   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7539   ins_encode %{
 7540     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7541     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7542                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7543                 $res$$Register, true);
 7544     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7545       __ isync();
 7546     } else {
 7547       __ sync();
 7548     }
 7549   %}
 7550   ins_pipe(pipe_class_default);
 7551 %}
 7552 
 7553 instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7554   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7555   predicate(VM_Version::has_lqarx());
 7556   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7557   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7558   ins_encode %{
 7559     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7560     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7561                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7562                 $res$$Register, true);
 7563     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7564       __ isync();
 7565     } else {
 7566       __ sync();
 7567     }
 7568   %}
 7569   ins_pipe(pipe_class_default);
 7570 %}
 7571 
 7572 instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7573   match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
 7574   predicate(!VM_Version::has_lqarx());
 7575   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7576   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7577   ins_encode %{
 7578     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7579     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7580                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7581                 $res$$Register, true);
 7582     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7583       __ isync();
 7584     } else {
 7585       __ sync();
 7586     }
 7587   %}
 7588   ins_pipe(pipe_class_default);
 7589 %}
 7590 
 7591 instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7592   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
 7593   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7594   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7595   ins_encode %{
 7596     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7597     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7598                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7599                 $res$$Register, true);
 7600     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7601       __ isync();
 7602     } else {
 7603       __ sync();
 7604     }
 7605   %}
 7606   ins_pipe(pipe_class_default);
 7607 %}
 7608 
 7609 instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7610   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
 7611   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7612   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7613   ins_encode %{
 7614     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7615     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7616                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7617                 $res$$Register, true);
 7618     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7619       __ isync();
 7620     } else {
 7621       __ sync();
 7622     }
 7623   %}
 7624   ins_pipe(pipe_class_default);
 7625 %}
 7626 
 7627 instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7628   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
 7629   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7630   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7631   ins_encode %{
 7632     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7633     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7634                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7635                 $res$$Register, NULL, true);
 7636     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7637       __ isync();
 7638     } else {
 7639       __ sync();
 7640     }
 7641   %}
 7642   ins_pipe(pipe_class_default);
 7643 %}
 7644 
 7645 instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7646   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
 7647   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7648   predicate(n->as_LoadStore()->barrier_data() == 0);
 7649   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7650   ins_encode %{
 7651     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7652     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7653                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7654                 $res$$Register, NULL, true);
 7655     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7656       __ isync();
 7657     } else {
 7658       __ sync();
 7659     }
 7660   %}
 7661   ins_pipe(pipe_class_default);
 7662 %}
 7663 
 7664 // Weak versions:
 7665 
 7666 instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7667   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7668   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7669   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7670   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7671   ins_encode %{
 7672     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7673     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7674                 MacroAssembler::MemBarNone,
 7675                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7676   %}
 7677   ins_pipe(pipe_class_default);
 7678 %}
 7679 
 7680 instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7681   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7682   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7683   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7684   format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
 7685   ins_encode %{
 7686     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7687     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7688                 MacroAssembler::MemBarNone,
 7689                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7690   %}
 7691   ins_pipe(pipe_class_default);
 7692 %}
 7693 
 7694 instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7695   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7696   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7697   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7698   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7699   ins_encode %{
 7700     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7701     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7702                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7703                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7704   %}
 7705   ins_pipe(pipe_class_default);
 7706 %}
 7707 
 7708 instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7709   match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
 7710   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7711   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7712   format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7713   ins_encode %{
 7714     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7715     __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7716                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7717                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7718   %}
 7719   ins_pipe(pipe_class_default);
 7720 %}
 7721 
 7722 instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7723   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7724   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7725   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7726   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7727   ins_encode %{
 7728     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7729     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7730                 MacroAssembler::MemBarNone,
 7731                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7732   %}
 7733   ins_pipe(pipe_class_default);
 7734 %}
 7735 
 7736 instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7737   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7738   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7739   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7740   format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
 7741   ins_encode %{
 7742     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7743     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7744                 MacroAssembler::MemBarNone,
 7745                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7746   %}
 7747   ins_pipe(pipe_class_default);
 7748 %}
 7749 
 7750 instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7751   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7752   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7753   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7754   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7755   ins_encode %{
 7756     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7757     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7758                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7759                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7760   %}
 7761   ins_pipe(pipe_class_default);
 7762 %}
 7763 
 7764 instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
 7765   match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
 7766   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7767   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
 7768   format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7769   ins_encode %{
 7770     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7771     __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
 7772                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7773                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7774   %}
 7775   ins_pipe(pipe_class_default);
 7776 %}
 7777 
 7778 instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7779   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7780   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7781   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7782   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7783   ins_encode %{
 7784     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7785     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7786                 MacroAssembler::MemBarNone,
 7787                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7788   %}
 7789   ins_pipe(pipe_class_default);
 7790 %}
 7791 
 7792 instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7793   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
 7794   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7795   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7796   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7797   ins_encode %{
 7798     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7799     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7800     // value is never passed to caller.
 7801     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7802                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7803                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7804   %}
 7805   ins_pipe(pipe_class_default);
 7806 %}
 7807 
 7808 instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7809   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7810   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7811   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7812   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
 7813   ins_encode %{
 7814     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7815     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7816                 MacroAssembler::MemBarNone,
 7817                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7818   %}
 7819   ins_pipe(pipe_class_default);
 7820 %}
 7821 
 7822 instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 7823   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
 7824   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7825   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7826   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7827   ins_encode %{
 7828     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7829     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7830     // value is never passed to caller.
 7831     __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7832                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7833                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
 7834   %}
 7835   ins_pipe(pipe_class_default);
 7836 %}
 7837 
 7838 instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7839   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7840   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 7841   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7842   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
 7843   ins_encode %{
 7844     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7845     // value is never passed to caller.
 7846     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7847                 MacroAssembler::MemBarNone,
 7848                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7849   %}
 7850   ins_pipe(pipe_class_default);
 7851 %}
 7852 
 7853 instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 7854   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
 7855   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 7856   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7857   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %}
 7858   ins_encode %{
 7859     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7860     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7861     // value is never passed to caller.
 7862     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7863                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7864                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7865   %}
 7866   ins_pipe(pipe_class_default);
 7867 %}
 7868 
 7869 instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7870   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7871   predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7872   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7873   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7874   ins_encode %{
 7875     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7876     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7877                 MacroAssembler::MemBarNone,
 7878                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7879   %}
 7880   ins_pipe(pipe_class_default);
 7881 %}
 7882 
 7883 instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 7884   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
 7885   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0);
 7886   effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
 7887   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
 7888   ins_encode %{
 7889     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7890     // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and
 7891     // value is never passed to caller.
 7892     __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 7893                 support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
 7894                 MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true);
 7895   %}
 7896   ins_pipe(pipe_class_default);
 7897 %}
 7898 
 7899 // CompareAndExchange
 7900 
 7901 instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7902   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7903   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7904   effect(TEMP_DEF res, TEMP cr0);
 7905   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7906   ins_encode %{
 7907     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7908     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7909                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7910                 noreg, true);
 7911   %}
 7912   ins_pipe(pipe_class_default);
 7913 %}
 7914 
 7915 instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7916   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7917   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7918   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7919   format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
 7920   ins_encode %{
 7921     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7922     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7923                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7924                 noreg, true);
 7925   %}
 7926   ins_pipe(pipe_class_default);
 7927 %}
 7928 
 7929 instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7930   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7931   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 7932   effect(TEMP_DEF res, TEMP cr0);
 7933   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7934   ins_encode %{
 7935     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7936     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7937                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7938                 noreg, true);
 7939     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7940       __ isync();
 7941     } else {
 7942       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7943       __ sync();
 7944     }
 7945   %}
 7946   ins_pipe(pipe_class_default);
 7947 %}
 7948 
 7949 instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7950   match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
 7951   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 7952   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7953   format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
 7954   ins_encode %{
 7955     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7956     __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7957                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7958                 noreg, true);
 7959     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 7960       __ isync();
 7961     } else {
 7962       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 7963       __ sync();
 7964     }
 7965   %}
 7966   ins_pipe(pipe_class_default);
 7967 %}
 7968 
 7969 instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7970   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7971   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
 7972   effect(TEMP_DEF res, TEMP cr0);
 7973   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7974   ins_encode %{
 7975     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7976     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 7977                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7978                 noreg, true);
 7979   %}
 7980   ins_pipe(pipe_class_default);
 7981 %}
 7982 
 7983 instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 7984   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7985   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
 7986   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 7987   format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
 7988   ins_encode %{
 7989     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 7990     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 7991                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 7992                 noreg, true);
 7993   %}
 7994   ins_pipe(pipe_class_default);
 7995 %}
 7996 
 7997 instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 7998   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 7999   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
 8000   effect(TEMP_DEF res, TEMP cr0);
 8001   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 8002   ins_encode %{
 8003     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8004     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
 8005                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8006                 noreg, true);
 8007     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8008       __ isync();
 8009     } else {
 8010       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8011       __ sync();
 8012     }
 8013   %}
 8014   ins_pipe(pipe_class_default);
 8015 %}
 8016 
 8017 instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
 8018   match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
 8019   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
 8020   effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
 8021   format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
 8022   ins_encode %{
 8023     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8024     __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
 8025                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8026                 noreg, true);
 8027     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8028       __ isync();
 8029     } else {
 8030       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8031       __ sync();
 8032     }
 8033   %}
 8034   ins_pipe(pipe_class_default);
 8035 %}
 8036 
 8037 instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8038   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8039   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8040   effect(TEMP_DEF res, TEMP cr0);
 8041   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %}
 8042   ins_encode %{
 8043     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8044     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8045                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8046                 noreg, true);
 8047   %}
 8048   ins_pipe(pipe_class_default);
 8049 %}
 8050 
 8051 instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
 8052   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
 8053   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8054   effect(TEMP_DEF res, TEMP cr0);
 8055   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %}
 8056   ins_encode %{
 8057     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8058     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8059                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8060                 noreg, true);
 8061     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8062       __ isync();
 8063     } else {
 8064       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8065       __ sync();
 8066     }
 8067   %}
 8068   ins_pipe(pipe_class_default);
 8069 %}
 8070 
 8071 instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8072   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8073   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8074   effect(TEMP_DEF res, TEMP cr0);
 8075   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8076   ins_encode %{
 8077     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8078     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8079                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8080                 noreg, true);
 8081   %}
 8082   ins_pipe(pipe_class_default);
 8083 %}
 8084 
 8085 instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
 8086   match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2)));
 8087   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8088   effect(TEMP_DEF res, TEMP cr0);
 8089   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
 8090   ins_encode %{
 8091     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8092     __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8093                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8094                 noreg, true);
 8095     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8096       __ isync();
 8097     } else {
 8098       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8099       __ sync();
 8100     }
 8101   %}
 8102   ins_pipe(pipe_class_default);
 8103 %}
 8104 
 8105 instruct compareAndExchangeL_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8106   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8107   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
 8108   effect(TEMP_DEF res, TEMP cr0);
 8109   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %}
 8110   ins_encode %{
 8111     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8112     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8113                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8114                 noreg, NULL, true);
 8115   %}
 8116   ins_pipe(pipe_class_default);
 8117 %}
 8118 
 8119 instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
 8120   match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2)));
 8121   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
 8122   effect(TEMP_DEF res, TEMP cr0);
 8123   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %}
 8124   ins_encode %{
 8125     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8126     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8127                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8128                 noreg, NULL, true);
 8129     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8130       __ isync();
 8131     } else {
 8132       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8133       __ sync();
 8134     }
 8135   %}
 8136   ins_pipe(pipe_class_default);
 8137 %}
 8138 
 8139 instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8140   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8141   predicate((((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)
 8142             && n->as_LoadStore()->barrier_data() == 0);
 8143   effect(TEMP_DEF res, TEMP cr0);
 8144   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8145   ins_encode %{
 8146     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8147     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8148                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8149                 noreg, NULL, true);
 8150   %}
 8151   ins_pipe(pipe_class_default);
 8152 %}
 8153 
 8154 instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
 8155   match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2)));
 8156   predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)
 8157             && n->as_LoadStore()->barrier_data() == 0);
 8158   effect(TEMP_DEF res, TEMP cr0);
 8159   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
 8160   ins_encode %{
 8161     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
 8162     __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register,
 8163                 MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
 8164                 noreg, NULL, true);
 8165     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8166       __ isync();
 8167     } else {
 8168       // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
 8169       __ sync();
 8170     }
 8171   %}
 8172   ins_pipe(pipe_class_default);
 8173 %}
 8174 
 8175 // Special RMW
 8176 
 8177 instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8178   match(Set res (GetAndAddB mem_ptr src));
 8179   predicate(VM_Version::has_lqarx());
 8180   effect(TEMP_DEF res, TEMP cr0);
 8181   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8182   ins_encode %{
 8183     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8184                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8185     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8186       __ isync();
 8187     } else {
 8188       __ sync();
 8189     }
 8190   %}
 8191   ins_pipe(pipe_class_default);
 8192 %}
 8193 
 8194 instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8195   match(Set res (GetAndAddB mem_ptr src));
 8196   predicate(!VM_Version::has_lqarx());
 8197   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8198   format %{ "GetAndAddB $res, $mem_ptr, $src" %}
 8199   ins_encode %{
 8200     __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8201                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8202     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8203       __ isync();
 8204     } else {
 8205       __ sync();
 8206     }
 8207   %}
 8208   ins_pipe(pipe_class_default);
 8209 %}
 8210 
 8211 instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8212   match(Set res (GetAndAddS mem_ptr src));
 8213   predicate(VM_Version::has_lqarx());
 8214   effect(TEMP_DEF res, TEMP cr0);
 8215   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8216   ins_encode %{
 8217     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8218                   R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8219     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8220       __ isync();
 8221     } else {
 8222       __ sync();
 8223     }
 8224   %}
 8225   ins_pipe(pipe_class_default);
 8226 %}
 8227 
 8228 instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8229   match(Set res (GetAndAddS mem_ptr src));
 8230   predicate(!VM_Version::has_lqarx());
 8231   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8232   format %{ "GetAndAddS $res, $mem_ptr, $src" %}
 8233   ins_encode %{
 8234     __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
 8235                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8236     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8237       __ isync();
 8238     } else {
 8239       __ sync();
 8240     }
 8241   %}
 8242   ins_pipe(pipe_class_default);
 8243 %}
 8244 
 8245 instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8246   match(Set res (GetAndAddI mem_ptr src));
 8247   effect(TEMP_DEF res, TEMP cr0);
 8248   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
 8249   ins_encode %{
 8250     __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8251                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8252     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8253       __ isync();
 8254     } else {
 8255       __ sync();
 8256     }
 8257   %}
 8258   ins_pipe(pipe_class_default);
 8259 %}
 8260 
 8261 instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8262   match(Set res (GetAndAddL mem_ptr src));
 8263   effect(TEMP_DEF res, TEMP cr0);
 8264   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
 8265   ins_encode %{
 8266     __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8267                   R0, MacroAssembler::cmpxchgx_hint_atomic_update());
 8268     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8269       __ isync();
 8270     } else {
 8271       __ sync();
 8272     }
 8273   %}
 8274   ins_pipe(pipe_class_default);
 8275 %}
 8276 
 8277 instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8278   match(Set res (GetAndSetB mem_ptr src));
 8279   predicate(VM_Version::has_lqarx());
 8280   effect(TEMP_DEF res, TEMP cr0);
 8281   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8282   ins_encode %{
 8283     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8284                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8285     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8286       __ isync();
 8287     } else {
 8288       __ sync();
 8289     }
 8290   %}
 8291   ins_pipe(pipe_class_default);
 8292 %}
 8293 
 8294 instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8295   match(Set res (GetAndSetB mem_ptr src));
 8296   predicate(!VM_Version::has_lqarx());
 8297   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8298   format %{ "GetAndSetB $res, $mem_ptr, $src" %}
 8299   ins_encode %{
 8300     __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
 8301                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8302     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8303       __ isync();
 8304     } else {
 8305       __ sync();
 8306     }
 8307   %}
 8308   ins_pipe(pipe_class_default);
 8309 %}
 8310 
 8311 instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8312   match(Set res (GetAndSetS mem_ptr src));
 8313   predicate(VM_Version::has_lqarx());
 8314   effect(TEMP_DEF res, TEMP cr0);
 8315   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8316   ins_encode %{
 8317     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8318                   noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
 8319     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8320       __ isync();
 8321     } else {
 8322       __ sync();
 8323     }
 8324   %}
 8325   ins_pipe(pipe_class_default);
 8326 %}
 8327 
 8328 instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
 8329   match(Set res (GetAndSetS mem_ptr src));
 8330   predicate(!VM_Version::has_lqarx());
 8331   effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
 8332   format %{ "GetAndSetS $res, $mem_ptr, $src" %}
 8333   ins_encode %{
 8334     __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
 8335                   R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
 8336     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8337       __ isync();
 8338     } else {
 8339       __ sync();
 8340     }
 8341   %}
 8342   ins_pipe(pipe_class_default);
 8343 %}
 8344 
 8345 instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
 8346   match(Set res (GetAndSetI mem_ptr src));
 8347   effect(TEMP_DEF res, TEMP cr0);
 8348   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
 8349   ins_encode %{
 8350     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8351                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8352     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8353       __ isync();
 8354     } else {
 8355       __ sync();
 8356     }
 8357   %}
 8358   ins_pipe(pipe_class_default);
 8359 %}
 8360 
 8361 instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
 8362   match(Set res (GetAndSetL mem_ptr src));
 8363   effect(TEMP_DEF res, TEMP cr0);
 8364   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
 8365   ins_encode %{
 8366     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8367                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8368     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8369       __ isync();
 8370     } else {
 8371       __ sync();
 8372     }
 8373   %}
 8374   ins_pipe(pipe_class_default);
 8375 %}
 8376 
 8377 instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
 8378   match(Set res (GetAndSetP mem_ptr src));
 8379   predicate(n->as_LoadStore()->barrier_data() == 0);
 8380   effect(TEMP_DEF res, TEMP cr0);
 8381   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
 8382   ins_encode %{
 8383     __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
 8384                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8385     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8386       __ isync();
 8387     } else {
 8388       __ sync();
 8389     }
 8390   %}
 8391   ins_pipe(pipe_class_default);
 8392 %}
 8393 
 8394 instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
 8395   match(Set res (GetAndSetN mem_ptr src));
 8396   effect(TEMP_DEF res, TEMP cr0);
 8397   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
 8398   ins_encode %{
 8399     __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
 8400                   MacroAssembler::cmpxchgx_hint_atomic_update());
 8401     if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
 8402       __ isync();
 8403     } else {
 8404       __ sync();
 8405     }
 8406   %}
 8407   ins_pipe(pipe_class_default);
 8408 %}
 8409 
 8410 //----------Arithmetic Instructions--------------------------------------------
 8411 // Addition Instructions
 8412 
 8413 // Register Addition
 8414 instruct addI_reg_reg(iRegIdst dst, iRegIsrc_iRegL2Isrc src1, iRegIsrc_iRegL2Isrc src2) %{
 8415   match(Set dst (AddI src1 src2));
 8416   format %{ "ADD     $dst, $src1, $src2" %}
 8417   size(4);
 8418   ins_encode %{
 8419     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8420   %}
 8421   ins_pipe(pipe_class_default);
 8422 %}
 8423 
 8424 // Expand does not work with above instruct. (??)
 8425 instruct addI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8426   // no match-rule
 8427   effect(DEF dst, USE src1, USE src2);
 8428   format %{ "ADD     $dst, $src1, $src2" %}
 8429   size(4);
 8430   ins_encode %{
 8431     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8432   %}
 8433   ins_pipe(pipe_class_default);
 8434 %}
 8435 
 8436 instruct tree_addI_addI_addI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 8437   match(Set dst (AddI (AddI (AddI src1 src2) src3) src4));
 8438   ins_cost(DEFAULT_COST*3);
 8439 
 8440   expand %{
 8441     // FIXME: we should do this in the ideal world.
 8442     iRegIdst tmp1;
 8443     iRegIdst tmp2;
 8444     addI_reg_reg(tmp1, src1, src2);
 8445     addI_reg_reg_2(tmp2, src3, src4); // Adlc complains about addI_reg_reg.
 8446     addI_reg_reg(dst, tmp1, tmp2);
 8447   %}
 8448 %}
 8449 
 8450 // Immediate Addition
 8451 instruct addI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8452   match(Set dst (AddI src1 src2));
 8453   format %{ "ADDI    $dst, $src1, $src2" %}
 8454   size(4);
 8455   ins_encode %{
 8456     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8457   %}
 8458   ins_pipe(pipe_class_default);
 8459 %}
 8460 
 8461 // Immediate Addition with 16-bit shifted operand
 8462 instruct addI_reg_immhi16(iRegIdst dst, iRegIsrc src1, immIhi16 src2) %{
 8463   match(Set dst (AddI src1 src2));
 8464   format %{ "ADDIS   $dst, $src1, $src2" %}
 8465   size(4);
 8466   ins_encode %{
 8467     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8468   %}
 8469   ins_pipe(pipe_class_default);
 8470 %}
 8471 
 8472 // Immediate Addition using prefixed addi
 8473 instruct addI_reg_imm32(iRegIdst dst, iRegIsrc src1, immI32 src2) %{
 8474   match(Set dst (AddI src1 src2));
 8475   predicate(PowerArchitecturePPC64 >= 10);
 8476   ins_cost(DEFAULT_COST+1);
 8477   format %{ "PADDI   $dst, $src1, $src2" %}
 8478   size(8);
 8479   ins_encode %{
 8480     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8481     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8482   %}
 8483   ins_pipe(pipe_class_default);
 8484   ins_alignment(2);
 8485 %}
 8486 
 8487 // Long Addition
 8488 instruct addL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8489   match(Set dst (AddL src1 src2));
 8490   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8491   size(4);
 8492   ins_encode %{
 8493     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8494   %}
 8495   ins_pipe(pipe_class_default);
 8496 %}
 8497 
 8498 // Expand does not work with above instruct. (??)
 8499 instruct addL_reg_reg_2(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8500   // no match-rule
 8501   effect(DEF dst, USE src1, USE src2);
 8502   format %{ "ADD     $dst, $src1, $src2 \t// long" %}
 8503   size(4);
 8504   ins_encode %{
 8505     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8506   %}
 8507   ins_pipe(pipe_class_default);
 8508 %}
 8509 
 8510 instruct tree_addL_addL_addL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, iRegLsrc src3, iRegLsrc src4) %{
 8511   match(Set dst (AddL (AddL (AddL src1 src2) src3) src4));
 8512   ins_cost(DEFAULT_COST*3);
 8513 
 8514   expand %{
 8515     // FIXME: we should do this in the ideal world.
 8516     iRegLdst tmp1;
 8517     iRegLdst tmp2;
 8518     addL_reg_reg(tmp1, src1, src2);
 8519     addL_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 8520     addL_reg_reg(dst, tmp1, tmp2);
 8521   %}
 8522 %}
 8523 
 8524 // AddL + ConvL2I.
 8525 instruct addI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8526   match(Set dst (ConvL2I (AddL src1 src2)));
 8527 
 8528   format %{ "ADD     $dst, $src1, $src2 \t// long + l2i" %}
 8529   size(4);
 8530   ins_encode %{
 8531     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8532   %}
 8533   ins_pipe(pipe_class_default);
 8534 %}
 8535 
 8536 // No constant pool entries required.
 8537 instruct addL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8538   match(Set dst (AddL src1 src2));
 8539 
 8540   format %{ "ADDI    $dst, $src1, $src2" %}
 8541   size(4);
 8542   ins_encode %{
 8543     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8544   %}
 8545   ins_pipe(pipe_class_default);
 8546 %}
 8547 
 8548 // Long Immediate Addition with 16-bit shifted operand.
 8549 // No constant pool entries required.
 8550 instruct addL_reg_immhi16(iRegLdst dst, iRegLsrc src1, immL32hi16 src2) %{
 8551   match(Set dst (AddL src1 src2));
 8552 
 8553   format %{ "ADDIS   $dst, $src1, $src2" %}
 8554   size(4);
 8555   ins_encode %{
 8556     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8557   %}
 8558   ins_pipe(pipe_class_default);
 8559 %}
 8560 
 8561 // Long Immediate Addition using prefixed addi
 8562 // No constant pool entries required.
 8563 instruct addL_reg_imm34(iRegLdst dst, iRegLsrc src1, immL34 src2) %{
 8564   match(Set dst (AddL src1 src2));
 8565   predicate(PowerArchitecturePPC64 >= 10);
 8566   ins_cost(DEFAULT_COST+1);
 8567 
 8568   format %{ "PADDI   $dst, $src1, $src2" %}
 8569   size(8);
 8570   ins_encode %{
 8571     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8572     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8573   %}
 8574   ins_pipe(pipe_class_default);
 8575   ins_alignment(2);
 8576 %}
 8577 
 8578 // Pointer Register Addition
 8579 instruct addP_reg_reg(iRegPdst dst, iRegP_N2P src1, iRegLsrc src2) %{
 8580   match(Set dst (AddP src1 src2));
 8581   format %{ "ADD     $dst, $src1, $src2" %}
 8582   size(4);
 8583   ins_encode %{
 8584     __ add($dst$$Register, $src1$$Register, $src2$$Register);
 8585   %}
 8586   ins_pipe(pipe_class_default);
 8587 %}
 8588 
 8589 // Pointer Immediate Addition
 8590 // No constant pool entries required.
 8591 instruct addP_reg_imm16(iRegPdst dst, iRegP_N2P src1, immL16 src2) %{
 8592   match(Set dst (AddP src1 src2));
 8593 
 8594   format %{ "ADDI    $dst, $src1, $src2" %}
 8595   size(4);
 8596   ins_encode %{
 8597     __ addi($dst$$Register, $src1$$Register, $src2$$constant);
 8598   %}
 8599   ins_pipe(pipe_class_default);
 8600 %}
 8601 
 8602 // Pointer Immediate Addition with 16-bit shifted operand.
 8603 // No constant pool entries required.
 8604 instruct addP_reg_immhi16(iRegPdst dst, iRegP_N2P src1, immL32hi16 src2) %{
 8605   match(Set dst (AddP src1 src2));
 8606 
 8607   format %{ "ADDIS   $dst, $src1, $src2" %}
 8608   size(4);
 8609   ins_encode %{
 8610     __ addis($dst$$Register, $src1$$Register, ($src2$$constant)>>16);
 8611   %}
 8612   ins_pipe(pipe_class_default);
 8613 %}
 8614 
 8615 // Pointer Immediate Addition using prefixed addi
 8616 // No constant pool entries required.
 8617 instruct addP_reg_imm34(iRegPdst dst, iRegP_N2P src1, immL34 src2) %{
 8618   match(Set dst (AddP src1 src2));
 8619   predicate(PowerArchitecturePPC64 >= 10);
 8620   ins_cost(DEFAULT_COST+1);
 8621 
 8622   format %{ "PADDI    $dst, $src1, $src2" %}
 8623   size(8);
 8624   ins_encode %{
 8625     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
 8626     __ paddi($dst$$Register, $src1$$Register, $src2$$constant);
 8627   %}
 8628   ins_pipe(pipe_class_default);
 8629   ins_alignment(2);
 8630 %}
 8631 
 8632 //---------------------
 8633 // Subtraction Instructions
 8634 
 8635 // Register Subtraction
 8636 instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8637   match(Set dst (SubI src1 src2));
 8638   format %{ "SUBF    $dst, $src2, $src1" %}
 8639   size(4);
 8640   ins_encode %{
 8641     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8642   %}
 8643   ins_pipe(pipe_class_default);
 8644 %}
 8645 
 8646 // Immediate Subtraction
 8647 // Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
 8648 // Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 8649 
 8650 // SubI from constant (using subfic).
 8651 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
 8652   match(Set dst (SubI src1 src2));
 8653   format %{ "SUBI    $dst, $src1, $src2" %}
 8654 
 8655   size(4);
 8656   ins_encode %{
 8657     __ subfic($dst$$Register, $src2$$Register, $src1$$constant);
 8658   %}
 8659   ins_pipe(pipe_class_default);
 8660 %}
 8661 
 8662 // Turn the sign-bit of an integer into a 32-bit mask, 0x0...0 for
 8663 // positive integers and 0xF...F for negative ones.
 8664 instruct signmask32I_regI(iRegIdst dst, iRegIsrc src) %{
 8665   // no match-rule, false predicate
 8666   effect(DEF dst, USE src);
 8667   predicate(false);
 8668 
 8669   format %{ "SRAWI   $dst, $src, #31" %}
 8670   size(4);
 8671   ins_encode %{
 8672     __ srawi($dst$$Register, $src$$Register, 0x1f);
 8673   %}
 8674   ins_pipe(pipe_class_default);
 8675 %}
 8676 
 8677 instruct absI_reg_Ex(iRegIdst dst, iRegIsrc src) %{
 8678   match(Set dst (AbsI src));
 8679   ins_cost(DEFAULT_COST*3);
 8680 
 8681   expand %{
 8682     iRegIdst tmp1;
 8683     iRegIdst tmp2;
 8684     signmask32I_regI(tmp1, src);
 8685     xorI_reg_reg(tmp2, tmp1, src);
 8686     subI_reg_reg(dst, tmp2, tmp1);
 8687   %}
 8688 %}
 8689 
 8690 instruct negI_regI(iRegIdst dst, immI_0 zero, iRegIsrc src2) %{
 8691   match(Set dst (SubI zero src2));
 8692   format %{ "NEG     $dst, $src2" %}
 8693   size(4);
 8694   ins_encode %{
 8695     __ neg($dst$$Register, $src2$$Register);
 8696   %}
 8697   ins_pipe(pipe_class_default);
 8698 %}
 8699 
 8700 // Long subtraction
 8701 instruct subL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8702   match(Set dst (SubL src1 src2));
 8703   format %{ "SUBF    $dst, $src2, $src1 \t// long" %}
 8704   size(4);
 8705   ins_encode %{
 8706     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8707   %}
 8708   ins_pipe(pipe_class_default);
 8709 %}
 8710 
 8711 // SubL + convL2I.
 8712 instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8713   match(Set dst (ConvL2I (SubL src1 src2)));
 8714 
 8715   format %{ "SUBF    $dst, $src2, $src1 \t// long + l2i" %}
 8716   size(4);
 8717   ins_encode %{
 8718     __ subf($dst$$Register, $src2$$Register, $src1$$Register);
 8719   %}
 8720   ins_pipe(pipe_class_default);
 8721 %}
 8722 
 8723 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8724 // positive longs and 0xF...F for negative ones.
 8725 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
 8726   // no match-rule, false predicate
 8727   effect(DEF dst, USE src);
 8728   predicate(false);
 8729 
 8730   format %{ "SRADI   $dst, $src, #63" %}
 8731   size(4);
 8732   ins_encode %{
 8733     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8734   %}
 8735   ins_pipe(pipe_class_default);
 8736 %}
 8737 
 8738 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 8739 // positive longs and 0xF...F for negative ones.
 8740 instruct signmask64L_regL(iRegLdst dst, iRegLsrc src) %{
 8741   // no match-rule, false predicate
 8742   effect(DEF dst, USE src);
 8743   predicate(false);
 8744 
 8745   format %{ "SRADI   $dst, $src, #63" %}
 8746   size(4);
 8747   ins_encode %{
 8748     __ sradi($dst$$Register, $src$$Register, 0x3f);
 8749   %}
 8750   ins_pipe(pipe_class_default);
 8751 %}
 8752 
 8753 instruct absL_reg_Ex(iRegLdst dst, iRegLsrc src) %{
 8754   match(Set dst (AbsL src));
 8755   ins_cost(DEFAULT_COST*3);
 8756 
 8757   expand %{
 8758     iRegLdst tmp1;
 8759     iRegLdst tmp2;
 8760     signmask64L_regL(tmp1, src);
 8761     xorL_reg_reg(tmp2, tmp1, src);
 8762     subL_reg_reg(dst, tmp2, tmp1);
 8763   %}
 8764 %}
 8765 
 8766 // Long negation
 8767 instruct negL_reg_reg(iRegLdst dst, immL_0 zero, iRegLsrc src2) %{
 8768   match(Set dst (SubL zero src2));
 8769   format %{ "NEG     $dst, $src2 \t// long" %}
 8770   size(4);
 8771   ins_encode %{
 8772     __ neg($dst$$Register, $src2$$Register);
 8773   %}
 8774   ins_pipe(pipe_class_default);
 8775 %}
 8776 
 8777 // NegL + ConvL2I.
 8778 instruct negI_con0_regL(iRegIdst dst, immL_0 zero, iRegLsrc src2) %{
 8779   match(Set dst (ConvL2I (SubL zero src2)));
 8780 
 8781   format %{ "NEG     $dst, $src2 \t// long + l2i" %}
 8782   size(4);
 8783   ins_encode %{
 8784     __ neg($dst$$Register, $src2$$Register);
 8785   %}
 8786   ins_pipe(pipe_class_default);
 8787 %}
 8788 
 8789 // Multiplication Instructions
 8790 // Integer Multiplication
 8791 
 8792 // Register Multiplication
 8793 instruct mulI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8794   match(Set dst (MulI src1 src2));
 8795   ins_cost(DEFAULT_COST);
 8796 
 8797   format %{ "MULLW   $dst, $src1, $src2" %}
 8798   size(4);
 8799   ins_encode %{
 8800     __ mullw($dst$$Register, $src1$$Register, $src2$$Register);
 8801   %}
 8802   ins_pipe(pipe_class_default);
 8803 %}
 8804 
 8805 // Immediate Multiplication
 8806 instruct mulI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
 8807   match(Set dst (MulI src1 src2));
 8808   ins_cost(DEFAULT_COST);
 8809 
 8810   format %{ "MULLI   $dst, $src1, $src2" %}
 8811   size(4);
 8812   ins_encode %{
 8813     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8814   %}
 8815   ins_pipe(pipe_class_default);
 8816 %}
 8817 
 8818 instruct mulL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8819   match(Set dst (MulL src1 src2));
 8820   ins_cost(DEFAULT_COST);
 8821 
 8822   format %{ "MULLD   $dst $src1, $src2 \t// long" %}
 8823   size(4);
 8824   ins_encode %{
 8825     __ mulld($dst$$Register, $src1$$Register, $src2$$Register);
 8826   %}
 8827   ins_pipe(pipe_class_default);
 8828 %}
 8829 
 8830 // Multiply high for optimized long division by constant.
 8831 instruct mulHighL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8832   match(Set dst (MulHiL src1 src2));
 8833   ins_cost(DEFAULT_COST);
 8834 
 8835   format %{ "MULHD   $dst $src1, $src2 \t// long" %}
 8836   size(4);
 8837   ins_encode %{
 8838     __ mulhd($dst$$Register, $src1$$Register, $src2$$Register);
 8839   %}
 8840   ins_pipe(pipe_class_default);
 8841 %}
 8842 
 8843 // Immediate Multiplication
 8844 instruct mulL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
 8845   match(Set dst (MulL src1 src2));
 8846   ins_cost(DEFAULT_COST);
 8847 
 8848   format %{ "MULLI   $dst, $src1, $src2" %}
 8849   size(4);
 8850   ins_encode %{
 8851     __ mulli($dst$$Register, $src1$$Register, $src2$$constant);
 8852   %}
 8853   ins_pipe(pipe_class_default);
 8854 %}
 8855 
 8856 // Integer Division with Immediate -1: Negate.
 8857 instruct divI_reg_immIvalueMinus1(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
 8858   match(Set dst (DivI src1 src2));
 8859   ins_cost(DEFAULT_COST);
 8860 
 8861   format %{ "NEG     $dst, $src1 \t// /-1" %}
 8862   size(4);
 8863   ins_encode %{
 8864     __ neg($dst$$Register, $src1$$Register);
 8865   %}
 8866   ins_pipe(pipe_class_default);
 8867 %}
 8868 
 8869 // Integer Division with constant, but not -1.
 8870 // We should be able to improve this by checking the type of src2.
 8871 // It might well be that src2 is known to be positive.
 8872 instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8873   match(Set dst (DivI src1 src2));
 8874   predicate(n->in(2)->find_int_con(-1) != -1); // src2 is a constant, but not -1
 8875   ins_cost(2*DEFAULT_COST);
 8876 
 8877   format %{ "DIVW    $dst, $src1, $src2 \t// /not-1" %}
 8878   size(4);
 8879   ins_encode %{
 8880     __ divw($dst$$Register, $src1$$Register, $src2$$Register);
 8881   %}
 8882   ins_pipe(pipe_class_default);
 8883 %}
 8884 
 8885 instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
 8886   effect(USE_DEF dst, USE src1, USE crx);
 8887   predicate(false);
 8888 
 8889   ins_variable_size_depending_on_alignment(true);
 8890 
 8891   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8892   // Worst case is branch + move + stop, no stop without scheduler.
 8893   size(8);
 8894   ins_encode %{
 8895     Label done;
 8896     __ bne($crx$$CondRegister, done);
 8897     __ neg($dst$$Register, $src1$$Register);
 8898     __ bind(done);
 8899   %}
 8900   ins_pipe(pipe_class_default);
 8901 %}
 8902 
 8903 // Integer Division with Registers not containing constants.
 8904 instruct divI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8905   match(Set dst (DivI src1 src2));
 8906   ins_cost(10*DEFAULT_COST);
 8907 
 8908   expand %{
 8909     immI16 imm %{ (int)-1 %}
 8910     flagsReg tmp1;
 8911     cmpI_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8912     divI_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8913     cmovI_bne_negI_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8914   %}
 8915 %}
 8916 
 8917 // Long Division with Immediate -1: Negate.
 8918 instruct divL_reg_immLvalueMinus1(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
 8919   match(Set dst (DivL src1 src2));
 8920   ins_cost(DEFAULT_COST);
 8921 
 8922   format %{ "NEG     $dst, $src1 \t// /-1, long" %}
 8923   size(4);
 8924   ins_encode %{
 8925     __ neg($dst$$Register, $src1$$Register);
 8926   %}
 8927   ins_pipe(pipe_class_default);
 8928 %}
 8929 
 8930 // Long Division with constant, but not -1.
 8931 instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8932   match(Set dst (DivL src1 src2));
 8933   predicate(n->in(2)->find_long_con(-1L) != -1L); // Src2 is a constant, but not -1.
 8934   ins_cost(2*DEFAULT_COST);
 8935 
 8936   format %{ "DIVD    $dst, $src1, $src2 \t// /not-1, long" %}
 8937   size(4);
 8938   ins_encode %{
 8939     __ divd($dst$$Register, $src1$$Register, $src2$$Register);
 8940   %}
 8941   ins_pipe(pipe_class_default);
 8942 %}
 8943 
 8944 instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
 8945   effect(USE_DEF dst, USE src1, USE crx);
 8946   predicate(false);
 8947 
 8948   ins_variable_size_depending_on_alignment(true);
 8949 
 8950   format %{ "CMOVE   $dst, neg($src1), $crx" %}
 8951   // Worst case is branch + move + stop, no stop without scheduler.
 8952   size(8);
 8953   ins_encode %{
 8954     Label done;
 8955     __ bne($crx$$CondRegister, done);
 8956     __ neg($dst$$Register, $src1$$Register);
 8957     __ bind(done);
 8958   %}
 8959   ins_pipe(pipe_class_default);
 8960 %}
 8961 
 8962 // Long Division with Registers not containing constants.
 8963 instruct divL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8964   match(Set dst (DivL src1 src2));
 8965   ins_cost(10*DEFAULT_COST);
 8966 
 8967   expand %{
 8968     immL16 imm %{ (int)-1 %}
 8969     flagsReg tmp1;
 8970     cmpL_reg_imm16(tmp1, src2, imm);          // check src2 == -1
 8971     divL_reg_regnotMinus1(dst, src1, src2);   // dst = src1 / src2
 8972     cmovL_bne_negL_reg(dst, tmp1, src1);      // cmove dst = neg(src1) if src2 == -1
 8973   %}
 8974 %}
 8975 
 8976 // Integer Remainder with registers.
 8977 instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 8978   match(Set dst (ModI src1 src2));
 8979   ins_cost(10*DEFAULT_COST);
 8980 
 8981   expand %{
 8982     immI16 imm %{ (int)-1 %}
 8983     flagsReg tmp1;
 8984     iRegIdst tmp2;
 8985     iRegIdst tmp3;
 8986     cmpI_reg_imm16(tmp1, src2, imm);           // check src2 == -1
 8987     divI_reg_regnotMinus1(tmp2, src1, src2);   // tmp2 = src1 / src2
 8988     cmovI_bne_negI_reg(tmp2, tmp1, src1);      // cmove tmp2 = neg(src1) if src2 == -1
 8989     mulI_reg_reg(tmp3, src2, tmp2);            // tmp3 = src2 * tmp2
 8990     subI_reg_reg(dst, src1, tmp3);             // dst = src1 - tmp3
 8991   %}
 8992 %}
 8993 
 8994 // Long Remainder with registers
 8995 instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 8996   match(Set dst (ModL src1 src2));
 8997   ins_cost(10*DEFAULT_COST);
 8998 
 8999   expand %{
 9000     immL16 imm %{ (int)-1 %}
 9001     flagsReg tmp1;
 9002     iRegLdst tmp2;
 9003     iRegLdst tmp3;
 9004     cmpL_reg_imm16(tmp1, src2, imm);             // check src2 == -1
 9005     divL_reg_regnotMinus1(tmp2, src1, src2);     // tmp2 = src1 / src2
 9006     cmovL_bne_negL_reg(tmp2, tmp1, src1);        // cmove tmp2 = neg(src1) if src2 == -1
 9007     mulL_reg_reg(tmp3, src2, tmp2);              // tmp3 = src2 * tmp2
 9008     subL_reg_reg(dst, src1, tmp3);               // dst = src1 - tmp3
 9009   %}
 9010 %}
 9011 
 9012 // Integer Shift Instructions
 9013 
 9014 // Register Shift Left
 9015 
 9016 // Clear all but the lowest #mask bits.
 9017 // Used to normalize shift amounts in registers.
 9018 instruct maskI_reg_imm(iRegIdst dst, iRegIsrc src, uimmI6 mask) %{
 9019   // no match-rule, false predicate
 9020   effect(DEF dst, USE src, USE mask);
 9021   predicate(false);
 9022 
 9023   format %{ "MASK    $dst, $src, $mask \t// clear $mask upper bits" %}
 9024   size(4);
 9025   ins_encode %{
 9026     __ clrldi($dst$$Register, $src$$Register, $mask$$constant);
 9027   %}
 9028   ins_pipe(pipe_class_default);
 9029 %}
 9030 
 9031 instruct lShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9032   // no match-rule, false predicate
 9033   effect(DEF dst, USE src1, USE src2);
 9034   predicate(false);
 9035 
 9036   format %{ "SLW     $dst, $src1, $src2" %}
 9037   size(4);
 9038   ins_encode %{
 9039     __ slw($dst$$Register, $src1$$Register, $src2$$Register);
 9040   %}
 9041   ins_pipe(pipe_class_default);
 9042 %}
 9043 
 9044 instruct lShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9045   match(Set dst (LShiftI src1 src2));
 9046   ins_cost(DEFAULT_COST*2);
 9047   expand %{
 9048     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9049     iRegIdst tmpI;
 9050     maskI_reg_imm(tmpI, src2, mask);
 9051     lShiftI_reg_reg(dst, src1, tmpI);
 9052   %}
 9053 %}
 9054 
 9055 // Register Shift Left Immediate
 9056 instruct lShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9057   match(Set dst (LShiftI src1 src2));
 9058 
 9059   format %{ "SLWI    $dst, $src1, ($src2 & 0x1f)" %}
 9060   size(4);
 9061   ins_encode %{
 9062     __ slwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9063   %}
 9064   ins_pipe(pipe_class_default);
 9065 %}
 9066 
 9067 // AndI with negpow2-constant + LShiftI
 9068 instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9069   match(Set dst (LShiftI (AndI src1 src2) src3));
 9070   predicate(UseRotateAndMaskInstructionsPPC64);
 9071 
 9072   format %{ "RLWINM  $dst, lShiftI(AndI($src1, $src2), $src3)" %}
 9073   size(4);
 9074   ins_encode %{
 9075     long src3      = $src3$$constant;
 9076     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9077     if (maskbits >= 32) {
 9078       __ li($dst$$Register, 0); // addi
 9079     } else {
 9080       __ rlwinm($dst$$Register, $src1$$Register, src3 & 0x1f, 0, (31-maskbits) & 0x1f);
 9081     }
 9082   %}
 9083   ins_pipe(pipe_class_default);
 9084 %}
 9085 
 9086 // RShiftI + AndI with negpow2-constant + LShiftI
 9087 instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2 src2, uimmI5 src3) %{
 9088   match(Set dst (LShiftI (AndI (RShiftI src1 src3) src2) src3));
 9089   predicate(UseRotateAndMaskInstructionsPPC64);
 9090 
 9091   format %{ "RLWINM  $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
 9092   size(4);
 9093   ins_encode %{
 9094     long src3      = $src3$$constant;
 9095     long maskbits  = src3 + log2i_exact(-(juint)$src2$$constant);
 9096     if (maskbits >= 32) {
 9097       __ li($dst$$Register, 0); // addi
 9098     } else {
 9099       __ rlwinm($dst$$Register, $src1$$Register, 0, 0, (31-maskbits) & 0x1f);
 9100     }
 9101   %}
 9102   ins_pipe(pipe_class_default);
 9103 %}
 9104 
 9105 instruct lShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9106   // no match-rule, false predicate
 9107   effect(DEF dst, USE src1, USE src2);
 9108   predicate(false);
 9109 
 9110   format %{ "SLD     $dst, $src1, $src2" %}
 9111   size(4);
 9112   ins_encode %{
 9113     __ sld($dst$$Register, $src1$$Register, $src2$$Register);
 9114   %}
 9115   ins_pipe(pipe_class_default);
 9116 %}
 9117 
 9118 // Register Shift Left
 9119 instruct lShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9120   match(Set dst (LShiftL src1 src2));
 9121   ins_cost(DEFAULT_COST*2);
 9122   expand %{
 9123     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9124     iRegIdst tmpI;
 9125     maskI_reg_imm(tmpI, src2, mask);
 9126     lShiftL_regL_regI(dst, src1, tmpI);
 9127   %}
 9128 %}
 9129 
 9130 // Register Shift Left Immediate
 9131 instruct lshiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9132   match(Set dst (LShiftL src1 src2));
 9133   format %{ "SLDI    $dst, $src1, ($src2 & 0x3f)" %}
 9134   size(4);
 9135   ins_encode %{
 9136     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9137   %}
 9138   ins_pipe(pipe_class_default);
 9139 %}
 9140 
 9141 // If we shift more than 32 bits, we need not convert I2L.
 9142 instruct lShiftL_regI_immGE32(iRegLdst dst, iRegIsrc src1, uimmI6_ge32 src2) %{
 9143   match(Set dst (LShiftL (ConvI2L src1) src2));
 9144   ins_cost(DEFAULT_COST);
 9145 
 9146   size(4);
 9147   format %{ "SLDI    $dst, i2l($src1), $src2" %}
 9148   ins_encode %{
 9149     __ sldi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9150   %}
 9151   ins_pipe(pipe_class_default);
 9152 %}
 9153 
 9154 // Shift a postivie int to the left.
 9155 // Clrlsldi clears the upper 32 bits and shifts.
 9156 instruct scaledPositiveI2L_lShiftL_convI2L_reg_imm6(iRegLdst dst, iRegIsrc src1, uimmI6 src2) %{
 9157   match(Set dst (LShiftL (ConvI2L src1) src2));
 9158   predicate(((ConvI2LNode*)(_kids[0]->_leaf))->type()->is_long()->is_positive_int());
 9159 
 9160   format %{ "SLDI    $dst, i2l(positive_int($src1)), $src2" %}
 9161   size(4);
 9162   ins_encode %{
 9163     __ clrlsldi($dst$$Register, $src1$$Register, 0x20, $src2$$constant);
 9164   %}
 9165   ins_pipe(pipe_class_default);
 9166 %}
 9167 
 9168 instruct arShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9169   // no match-rule, false predicate
 9170   effect(DEF dst, USE src1, USE src2);
 9171   predicate(false);
 9172 
 9173   format %{ "SRAW    $dst, $src1, $src2" %}
 9174   size(4);
 9175   ins_encode %{
 9176     __ sraw($dst$$Register, $src1$$Register, $src2$$Register);
 9177   %}
 9178   ins_pipe(pipe_class_default);
 9179 %}
 9180 
 9181 // Register Arithmetic Shift Right
 9182 instruct arShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9183   match(Set dst (RShiftI src1 src2));
 9184   ins_cost(DEFAULT_COST*2);
 9185   expand %{
 9186     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9187     iRegIdst tmpI;
 9188     maskI_reg_imm(tmpI, src2, mask);
 9189     arShiftI_reg_reg(dst, src1, tmpI);
 9190   %}
 9191 %}
 9192 
 9193 // Register Arithmetic Shift Right Immediate
 9194 instruct arShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9195   match(Set dst (RShiftI src1 src2));
 9196 
 9197   format %{ "SRAWI   $dst, $src1, ($src2 & 0x1f)" %}
 9198   size(4);
 9199   ins_encode %{
 9200     __ srawi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9201   %}
 9202   ins_pipe(pipe_class_default);
 9203 %}
 9204 
 9205 instruct arShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9206   // no match-rule, false predicate
 9207   effect(DEF dst, USE src1, USE src2);
 9208   predicate(false);
 9209 
 9210   format %{ "SRAD    $dst, $src1, $src2" %}
 9211   size(4);
 9212   ins_encode %{
 9213     __ srad($dst$$Register, $src1$$Register, $src2$$Register);
 9214   %}
 9215   ins_pipe(pipe_class_default);
 9216 %}
 9217 
 9218 // Register Shift Right Arithmetic Long
 9219 instruct arShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9220   match(Set dst (RShiftL src1 src2));
 9221   ins_cost(DEFAULT_COST*2);
 9222 
 9223   expand %{
 9224     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9225     iRegIdst tmpI;
 9226     maskI_reg_imm(tmpI, src2, mask);
 9227     arShiftL_regL_regI(dst, src1, tmpI);
 9228   %}
 9229 %}
 9230 
 9231 // Register Shift Right Immediate
 9232 instruct arShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9233   match(Set dst (RShiftL src1 src2));
 9234 
 9235   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f)" %}
 9236   size(4);
 9237   ins_encode %{
 9238     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9239   %}
 9240   ins_pipe(pipe_class_default);
 9241 %}
 9242 
 9243 // RShiftL + ConvL2I
 9244 instruct convL2I_arShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9245   match(Set dst (ConvL2I (RShiftL src1 src2)));
 9246 
 9247   format %{ "SRADI   $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9248   size(4);
 9249   ins_encode %{
 9250     __ sradi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9251   %}
 9252   ins_pipe(pipe_class_default);
 9253 %}
 9254 
 9255 instruct urShiftI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9256   // no match-rule, false predicate
 9257   effect(DEF dst, USE src1, USE src2);
 9258   predicate(false);
 9259 
 9260   format %{ "SRW     $dst, $src1, $src2" %}
 9261   size(4);
 9262   ins_encode %{
 9263     __ srw($dst$$Register, $src1$$Register, $src2$$Register);
 9264   %}
 9265   ins_pipe(pipe_class_default);
 9266 %}
 9267 
 9268 // Register Shift Right
 9269 instruct urShiftI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9270   match(Set dst (URShiftI src1 src2));
 9271   ins_cost(DEFAULT_COST*2);
 9272 
 9273   expand %{
 9274     uimmI6 mask %{ 0x3b /* clear 59 bits, keep 5 */ %}
 9275     iRegIdst tmpI;
 9276     maskI_reg_imm(tmpI, src2, mask);
 9277     urShiftI_reg_reg(dst, src1, tmpI);
 9278   %}
 9279 %}
 9280 
 9281 // Register Shift Right Immediate
 9282 instruct urShiftI_reg_imm(iRegIdst dst, iRegIsrc src1, immI src2) %{
 9283   match(Set dst (URShiftI src1 src2));
 9284 
 9285   format %{ "SRWI    $dst, $src1, ($src2 & 0x1f)" %}
 9286   size(4);
 9287   ins_encode %{
 9288     __ srwi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x1f);
 9289   %}
 9290   ins_pipe(pipe_class_default);
 9291 %}
 9292 
 9293 instruct urShiftL_regL_regI(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9294   // no match-rule, false predicate
 9295   effect(DEF dst, USE src1, USE src2);
 9296   predicate(false);
 9297 
 9298   format %{ "SRD     $dst, $src1, $src2" %}
 9299   size(4);
 9300   ins_encode %{
 9301     __ srd($dst$$Register, $src1$$Register, $src2$$Register);
 9302   %}
 9303   ins_pipe(pipe_class_default);
 9304 %}
 9305 
 9306 // Register Shift Right
 9307 instruct urShiftL_regL_regI_Ex(iRegLdst dst, iRegLsrc src1, iRegIsrc src2) %{
 9308   match(Set dst (URShiftL src1 src2));
 9309   ins_cost(DEFAULT_COST*2);
 9310 
 9311   expand %{
 9312     uimmI6 mask %{ 0x3a /* clear 58 bits, keep 6 */ %}
 9313     iRegIdst tmpI;
 9314     maskI_reg_imm(tmpI, src2, mask);
 9315     urShiftL_regL_regI(dst, src1, tmpI);
 9316   %}
 9317 %}
 9318 
 9319 // Register Shift Right Immediate
 9320 instruct urShiftL_regL_immI(iRegLdst dst, iRegLsrc src1, immI src2) %{
 9321   match(Set dst (URShiftL src1 src2));
 9322 
 9323   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f)" %}
 9324   size(4);
 9325   ins_encode %{
 9326     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9327   %}
 9328   ins_pipe(pipe_class_default);
 9329 %}
 9330 
 9331 // URShiftL + ConvL2I.
 9332 instruct convL2I_urShiftL_regL_immI(iRegIdst dst, iRegLsrc src1, immI src2) %{
 9333   match(Set dst (ConvL2I (URShiftL src1 src2)));
 9334 
 9335   format %{ "SRDI    $dst, $src1, ($src2 & 0x3f) \t// long + l2i" %}
 9336   size(4);
 9337   ins_encode %{
 9338     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9339   %}
 9340   ins_pipe(pipe_class_default);
 9341 %}
 9342 
 9343 // Register Shift Right Immediate with a CastP2X
 9344 instruct shrP_convP2X_reg_imm6(iRegLdst dst, iRegP_N2P src1, uimmI6 src2) %{
 9345   match(Set dst (URShiftL (CastP2X src1) src2));
 9346 
 9347   format %{ "SRDI    $dst, $src1, $src2 \t// Cast ptr $src1 to long and shift" %}
 9348   size(4);
 9349   ins_encode %{
 9350     __ srdi($dst$$Register, $src1$$Register, ($src2$$constant) & 0x3f);
 9351   %}
 9352   ins_pipe(pipe_class_default);
 9353 %}
 9354 
 9355 // Bitfield Extract: URShiftI + AndI
 9356 instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immI src2, immIpow2minus1 src3) %{
 9357   match(Set dst (AndI (URShiftI src1 src2) src3));
 9358 
 9359   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// int bitfield extract" %}
 9360   size(4);
 9361   ins_encode %{
 9362     int rshift = ($src2$$constant) & 0x1f;
 9363     int length = log2i_exact((juint)$src3$$constant + 1u);
 9364     if (rshift + length > 32) {
 9365       // if necessary, adjust mask to omit rotated bits.
 9366       length = 32 - rshift;
 9367     }
 9368     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9369   %}
 9370   ins_pipe(pipe_class_default);
 9371 %}
 9372 
 9373 // Bitfield Extract: URShiftL + AndL
 9374 instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immI src2, immLpow2minus1 src3) %{
 9375   match(Set dst (AndL (URShiftL src1 src2) src3));
 9376 
 9377   format %{ "EXTRDI  $dst, $src1, shift=$src2, mask=$src3 \t// long bitfield extract" %}
 9378   size(4);
 9379   ins_encode %{
 9380     int rshift  = ($src2$$constant) & 0x3f;
 9381     int length = log2i_exact((julong)$src3$$constant + 1ull);
 9382     if (rshift + length > 64) {
 9383       // if necessary, adjust mask to omit rotated bits.
 9384       length = 64 - rshift;
 9385     }
 9386     __ extrdi($dst$$Register, $src1$$Register, length, 64 - (rshift + length));
 9387   %}
 9388   ins_pipe(pipe_class_default);
 9389 %}
 9390 
 9391 instruct sxtI_reg(iRegIdst dst, iRegIsrc src) %{
 9392   match(Set dst (ConvL2I (ConvI2L src)));
 9393 
 9394   format %{ "EXTSW   $dst, $src \t// int->int" %}
 9395   size(4);
 9396   ins_encode %{
 9397     __ extsw($dst$$Register, $src$$Register);
 9398   %}
 9399   ins_pipe(pipe_class_default);
 9400 %}
 9401 
 9402 //----------Rotate Instructions------------------------------------------------
 9403 
 9404 // Rotate Left by 8-bit immediate
 9405 instruct rotlI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 lshift, immI8 rshift) %{
 9406   match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
 9407   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9408 
 9409   format %{ "ROTLWI  $dst, $src, $lshift" %}
 9410   size(4);
 9411   ins_encode %{
 9412     __ rotlwi($dst$$Register, $src$$Register, $lshift$$constant);
 9413   %}
 9414   ins_pipe(pipe_class_default);
 9415 %}
 9416 
 9417 // Rotate Right by 8-bit immediate
 9418 instruct rotrI_reg_immi8(iRegIdst dst, iRegIsrc src, immI8 rshift, immI8 lshift) %{
 9419   match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
 9420   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 9421 
 9422   format %{ "ROTRWI  $dst, $rshift" %}
 9423   size(4);
 9424   ins_encode %{
 9425     __ rotrwi($dst$$Register, $src$$Register, $rshift$$constant);
 9426   %}
 9427   ins_pipe(pipe_class_default);
 9428 %}
 9429 
 9430 //----------Floating Point Arithmetic Instructions-----------------------------
 9431 
 9432 // Add float single precision
 9433 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
 9434   match(Set dst (AddF src1 src2));
 9435 
 9436   format %{ "FADDS   $dst, $src1, $src2" %}
 9437   size(4);
 9438   ins_encode %{
 9439     __ fadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9440   %}
 9441   ins_pipe(pipe_class_default);
 9442 %}
 9443 
 9444 // Add float double precision
 9445 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
 9446   match(Set dst (AddD src1 src2));
 9447 
 9448   format %{ "FADD    $dst, $src1, $src2" %}
 9449   size(4);
 9450   ins_encode %{
 9451     __ fadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9452   %}
 9453   ins_pipe(pipe_class_default);
 9454 %}
 9455 
 9456 // Sub float single precision
 9457 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
 9458   match(Set dst (SubF src1 src2));
 9459 
 9460   format %{ "FSUBS   $dst, $src1, $src2" %}
 9461   size(4);
 9462   ins_encode %{
 9463     __ fsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9464   %}
 9465   ins_pipe(pipe_class_default);
 9466 %}
 9467 
 9468 // Sub float double precision
 9469 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
 9470   match(Set dst (SubD src1 src2));
 9471   format %{ "FSUB    $dst, $src1, $src2" %}
 9472   size(4);
 9473   ins_encode %{
 9474     __ fsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9475   %}
 9476   ins_pipe(pipe_class_default);
 9477 %}
 9478 
 9479 // Mul float single precision
 9480 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
 9481   match(Set dst (MulF src1 src2));
 9482   format %{ "FMULS   $dst, $src1, $src2" %}
 9483   size(4);
 9484   ins_encode %{
 9485     __ fmuls($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9486   %}
 9487   ins_pipe(pipe_class_default);
 9488 %}
 9489 
 9490 // Mul float double precision
 9491 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
 9492   match(Set dst (MulD src1 src2));
 9493   format %{ "FMUL    $dst, $src1, $src2" %}
 9494   size(4);
 9495   ins_encode %{
 9496     __ fmul($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9497   %}
 9498   ins_pipe(pipe_class_default);
 9499 %}
 9500 
 9501 // Div float single precision
 9502 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
 9503   match(Set dst (DivF src1 src2));
 9504   format %{ "FDIVS   $dst, $src1, $src2" %}
 9505   size(4);
 9506   ins_encode %{
 9507     __ fdivs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9508   %}
 9509   ins_pipe(pipe_class_default);
 9510 %}
 9511 
 9512 // Div float double precision
 9513 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
 9514   match(Set dst (DivD src1 src2));
 9515   format %{ "FDIV    $dst, $src1, $src2" %}
 9516   size(4);
 9517   ins_encode %{
 9518     __ fdiv($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
 9519   %}
 9520   ins_pipe(pipe_class_default);
 9521 %}
 9522 
 9523 // Absolute float single precision
 9524 instruct absF_reg(regF dst, regF src) %{
 9525   match(Set dst (AbsF src));
 9526   format %{ "FABS    $dst, $src \t// float" %}
 9527   size(4);
 9528   ins_encode %{
 9529     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9530   %}
 9531   ins_pipe(pipe_class_default);
 9532 %}
 9533 
 9534 // Absolute float double precision
 9535 instruct absD_reg(regD dst, regD src) %{
 9536   match(Set dst (AbsD src));
 9537   format %{ "FABS    $dst, $src \t// double" %}
 9538   size(4);
 9539   ins_encode %{
 9540     __ fabs($dst$$FloatRegister, $src$$FloatRegister);
 9541   %}
 9542   ins_pipe(pipe_class_default);
 9543 %}
 9544 
 9545 instruct negF_reg(regF dst, regF src) %{
 9546   match(Set dst (NegF src));
 9547   format %{ "FNEG    $dst, $src \t// float" %}
 9548   size(4);
 9549   ins_encode %{
 9550     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9551   %}
 9552   ins_pipe(pipe_class_default);
 9553 %}
 9554 
 9555 instruct negD_reg(regD dst, regD src) %{
 9556   match(Set dst (NegD src));
 9557   format %{ "FNEG    $dst, $src \t// double" %}
 9558   size(4);
 9559   ins_encode %{
 9560     __ fneg($dst$$FloatRegister, $src$$FloatRegister);
 9561   %}
 9562   ins_pipe(pipe_class_default);
 9563 %}
 9564 
 9565 // AbsF + NegF.
 9566 instruct negF_absF_reg(regF dst, regF src) %{
 9567   match(Set dst (NegF (AbsF src)));
 9568   format %{ "FNABS   $dst, $src \t// float" %}
 9569   size(4);
 9570   ins_encode %{
 9571     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9572   %}
 9573   ins_pipe(pipe_class_default);
 9574 %}
 9575 
 9576 // AbsD + NegD.
 9577 instruct negD_absD_reg(regD dst, regD src) %{
 9578   match(Set dst (NegD (AbsD src)));
 9579   format %{ "FNABS   $dst, $src \t// double" %}
 9580   size(4);
 9581   ins_encode %{
 9582     __ fnabs($dst$$FloatRegister, $src$$FloatRegister);
 9583   %}
 9584   ins_pipe(pipe_class_default);
 9585 %}
 9586 
 9587 // VM_Version::has_fsqrt() decides if this node will be used.
 9588 // Sqrt float double precision
 9589 instruct sqrtD_reg(regD dst, regD src) %{
 9590   match(Set dst (SqrtD src));
 9591   format %{ "FSQRT   $dst, $src" %}
 9592   size(4);
 9593   ins_encode %{
 9594     __ fsqrt($dst$$FloatRegister, $src$$FloatRegister);
 9595   %}
 9596   ins_pipe(pipe_class_default);
 9597 %}
 9598 
 9599 // Single-precision sqrt.
 9600 instruct sqrtF_reg(regF dst, regF src) %{
 9601   match(Set dst (SqrtF src));
 9602   predicate(VM_Version::has_fsqrts());
 9603   ins_cost(DEFAULT_COST);
 9604 
 9605   format %{ "FSQRTS  $dst, $src" %}
 9606   size(4);
 9607   ins_encode %{
 9608     __ fsqrts($dst$$FloatRegister, $src$$FloatRegister);
 9609   %}
 9610   ins_pipe(pipe_class_default);
 9611 %}
 9612 
 9613 instruct roundDouble_nop(regD dst) %{
 9614   match(Set dst (RoundDouble dst));
 9615   ins_cost(0);
 9616 
 9617   format %{ " -- \t// RoundDouble not needed - empty" %}
 9618   size(0);
 9619   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9620   ins_encode( /*empty*/ );
 9621   ins_pipe(pipe_class_default);
 9622 %}
 9623 
 9624 instruct roundFloat_nop(regF dst) %{
 9625   match(Set dst (RoundFloat dst));
 9626   ins_cost(0);
 9627 
 9628   format %{ " -- \t// RoundFloat not needed - empty" %}
 9629   size(0);
 9630   // PPC results are already "rounded" (i.e., normal-format IEEE).
 9631   ins_encode( /*empty*/ );
 9632   ins_pipe(pipe_class_default);
 9633 %}
 9634 
 9635 
 9636 // Multiply-Accumulate
 9637 // src1 * src2 + src3
 9638 instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9639   match(Set dst (FmaF src3 (Binary src1 src2)));
 9640 
 9641   format %{ "FMADDS  $dst, $src1, $src2, $src3" %}
 9642   size(4);
 9643   ins_encode %{
 9644     __ fmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9645   %}
 9646   ins_pipe(pipe_class_default);
 9647 %}
 9648 
 9649 // src1 * src2 + src3
 9650 instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9651   match(Set dst (FmaD src3 (Binary src1 src2)));
 9652 
 9653   format %{ "FMADD   $dst, $src1, $src2, $src3" %}
 9654   size(4);
 9655   ins_encode %{
 9656     __ fmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9657   %}
 9658   ins_pipe(pipe_class_default);
 9659 %}
 9660 
 9661 // -src1 * src2 + src3 = -(src1*src2-src3)
 9662 instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9663   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
 9664   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 9665 
 9666   format %{ "FNMSUBS $dst, $src1, $src2, $src3" %}
 9667   size(4);
 9668   ins_encode %{
 9669     __ fnmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9670   %}
 9671   ins_pipe(pipe_class_default);
 9672 %}
 9673 
 9674 // -src1 * src2 + src3 = -(src1*src2-src3)
 9675 instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9676   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
 9677   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 9678 
 9679   format %{ "FNMSUB  $dst, $src1, $src2, $src3" %}
 9680   size(4);
 9681   ins_encode %{
 9682     __ fnmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9683   %}
 9684   ins_pipe(pipe_class_default);
 9685 %}
 9686 
 9687 // -src1 * src2 - src3 = -(src1*src2+src3)
 9688 instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9689   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
 9690   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 9691 
 9692   format %{ "FNMADDS $dst, $src1, $src2, $src3" %}
 9693   size(4);
 9694   ins_encode %{
 9695     __ fnmadds($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9696   %}
 9697   ins_pipe(pipe_class_default);
 9698 %}
 9699 
 9700 // -src1 * src2 - src3 = -(src1*src2+src3)
 9701 instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9702   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
 9703   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
 9704 
 9705   format %{ "FNMADD  $dst, $src1, $src2, $src3" %}
 9706   size(4);
 9707   ins_encode %{
 9708     __ fnmadd($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9709   %}
 9710   ins_pipe(pipe_class_default);
 9711 %}
 9712 
 9713 // src1 * src2 - src3
 9714 instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
 9715   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 9716 
 9717   format %{ "FMSUBS  $dst, $src1, $src2, $src3" %}
 9718   size(4);
 9719   ins_encode %{
 9720     __ fmsubs($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9721   %}
 9722   ins_pipe(pipe_class_default);
 9723 %}
 9724 
 9725 // src1 * src2 - src3
 9726 instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
 9727   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 9728 
 9729   format %{ "FMSUB   $dst, $src1, $src2, $src3" %}
 9730   size(4);
 9731   ins_encode %{
 9732     __ fmsub($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
 9733   %}
 9734   ins_pipe(pipe_class_default);
 9735 %}
 9736 
 9737 
 9738 //----------Logical Instructions-----------------------------------------------
 9739 
 9740 // And Instructions
 9741 
 9742 // Register And
 9743 instruct andI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9744   match(Set dst (AndI src1 src2));
 9745   format %{ "AND     $dst, $src1, $src2" %}
 9746   size(4);
 9747   ins_encode %{
 9748     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9749   %}
 9750   ins_pipe(pipe_class_default);
 9751 %}
 9752 
 9753 // Left shifted Immediate And
 9754 instruct andI_reg_immIhi16(iRegIdst dst, iRegIsrc src1, immIhi16  src2, flagsRegCR0 cr0) %{
 9755   match(Set dst (AndI src1 src2));
 9756   effect(KILL cr0);
 9757   format %{ "ANDIS   $dst, $src1, $src2.hi" %}
 9758   size(4);
 9759   ins_encode %{
 9760     __ andis_($dst$$Register, $src1$$Register, (int)((unsigned short)(($src2$$constant & 0xFFFF0000) >> 16)));
 9761   %}
 9762   ins_pipe(pipe_class_default);
 9763 %}
 9764 
 9765 // Immediate And
 9766 instruct andI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2, flagsRegCR0 cr0) %{
 9767   match(Set dst (AndI src1 src2));
 9768   effect(KILL cr0);
 9769 
 9770   format %{ "ANDI    $dst, $src1, $src2" %}
 9771   size(4);
 9772   ins_encode %{
 9773     // FIXME: avoid andi_ ?
 9774     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9775   %}
 9776   ins_pipe(pipe_class_default);
 9777 %}
 9778 
 9779 // Immediate And where the immediate is a negative power of 2.
 9780 instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
 9781   match(Set dst (AndI src1 src2));
 9782   format %{ "ANDWI   $dst, $src1, $src2" %}
 9783   size(4);
 9784   ins_encode %{
 9785     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(juint)$src2$$constant));
 9786   %}
 9787   ins_pipe(pipe_class_default);
 9788 %}
 9789 
 9790 instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src2) %{
 9791   match(Set dst (AndI src1 src2));
 9792   format %{ "ANDWI   $dst, $src1, $src2" %}
 9793   size(4);
 9794   ins_encode %{
 9795     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((juint)$src2$$constant + 1u));
 9796   %}
 9797   ins_pipe(pipe_class_default);
 9798 %}
 9799 
 9800 instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %{
 9801   match(Set dst (AndI src1 src2));
 9802   predicate(UseRotateAndMaskInstructionsPPC64);
 9803   format %{ "ANDWI   $dst, $src1, $src2" %}
 9804   size(4);
 9805   ins_encode %{
 9806     int bitpos = 31 - log2i_exact((juint)$src2$$constant);
 9807     __ rlwinm($dst$$Register, $src1$$Register, 0, bitpos, bitpos);
 9808   %}
 9809   ins_pipe(pipe_class_default);
 9810 %}
 9811 
 9812 // Register And Long
 9813 instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9814   match(Set dst (AndL src1 src2));
 9815   ins_cost(DEFAULT_COST);
 9816 
 9817   format %{ "AND     $dst, $src1, $src2 \t// long" %}
 9818   size(4);
 9819   ins_encode %{
 9820     __ andr($dst$$Register, $src1$$Register, $src2$$Register);
 9821   %}
 9822   ins_pipe(pipe_class_default);
 9823 %}
 9824 
 9825 // Immediate And long
 9826 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
 9827   match(Set dst (AndL src1 src2));
 9828   effect(KILL cr0);
 9829 
 9830   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
 9831   size(4);
 9832   ins_encode %{
 9833     // FIXME: avoid andi_ ?
 9834     __ andi_($dst$$Register, $src1$$Register, $src2$$constant);
 9835   %}
 9836   ins_pipe(pipe_class_default);
 9837 %}
 9838 
 9839 // Immediate And Long where the immediate is a negative power of 2.
 9840 instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
 9841   match(Set dst (AndL src1 src2));
 9842   format %{ "ANDDI   $dst, $src1, $src2" %}
 9843   size(4);
 9844   ins_encode %{
 9845     __ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(julong)$src2$$constant));
 9846   %}
 9847   ins_pipe(pipe_class_default);
 9848 %}
 9849 
 9850 instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9851   match(Set dst (AndL src1 src2));
 9852   format %{ "ANDDI   $dst, $src1, $src2" %}
 9853   size(4);
 9854   ins_encode %{
 9855     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9856   %}
 9857   ins_pipe(pipe_class_default);
 9858 %}
 9859 
 9860 // AndL + ConvL2I.
 9861 instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2minus1 src2) %{
 9862   match(Set dst (ConvL2I (AndL src1 src2)));
 9863   ins_cost(DEFAULT_COST);
 9864 
 9865   format %{ "ANDDI   $dst, $src1, $src2 \t// long + l2i" %}
 9866   size(4);
 9867   ins_encode %{
 9868     __ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
 9869   %}
 9870   ins_pipe(pipe_class_default);
 9871 %}
 9872 
 9873 // Or Instructions
 9874 
 9875 // Register Or
 9876 instruct orI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9877   match(Set dst (OrI src1 src2));
 9878   format %{ "OR      $dst, $src1, $src2" %}
 9879   size(4);
 9880   ins_encode %{
 9881     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9882   %}
 9883   ins_pipe(pipe_class_default);
 9884 %}
 9885 
 9886 // Expand does not work with above instruct. (??)
 9887 instruct orI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9888   // no match-rule
 9889   effect(DEF dst, USE src1, USE src2);
 9890   format %{ "OR      $dst, $src1, $src2" %}
 9891   size(4);
 9892   ins_encode %{
 9893     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9894   %}
 9895   ins_pipe(pipe_class_default);
 9896 %}
 9897 
 9898 instruct tree_orI_orI_orI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9899   match(Set dst (OrI (OrI (OrI src1 src2) src3) src4));
 9900   ins_cost(DEFAULT_COST*3);
 9901 
 9902   expand %{
 9903     // FIXME: we should do this in the ideal world.
 9904     iRegIdst tmp1;
 9905     iRegIdst tmp2;
 9906     orI_reg_reg(tmp1, src1, src2);
 9907     orI_reg_reg_2(tmp2, src3, src4); // Adlc complains about orI_reg_reg.
 9908     orI_reg_reg(dst, tmp1, tmp2);
 9909   %}
 9910 %}
 9911 
 9912 // Immediate Or
 9913 instruct orI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
 9914   match(Set dst (OrI src1 src2));
 9915   format %{ "ORI     $dst, $src1, $src2" %}
 9916   size(4);
 9917   ins_encode %{
 9918     __ ori($dst$$Register, $src1$$Register, ($src2$$constant) & 0xFFFF);
 9919   %}
 9920   ins_pipe(pipe_class_default);
 9921 %}
 9922 
 9923 // Register Or Long
 9924 instruct orL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9925   match(Set dst (OrL src1 src2));
 9926   ins_cost(DEFAULT_COST);
 9927 
 9928   size(4);
 9929   format %{ "OR      $dst, $src1, $src2 \t// long" %}
 9930   ins_encode %{
 9931     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9932   %}
 9933   ins_pipe(pipe_class_default);
 9934 %}
 9935 
 9936 // OrL + ConvL2I.
 9937 instruct orI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
 9938   match(Set dst (ConvL2I (OrL src1 src2)));
 9939   ins_cost(DEFAULT_COST);
 9940 
 9941   format %{ "OR      $dst, $src1, $src2 \t// long + l2i" %}
 9942   size(4);
 9943   ins_encode %{
 9944     __ or_unchecked($dst$$Register, $src1$$Register, $src2$$Register);
 9945   %}
 9946   ins_pipe(pipe_class_default);
 9947 %}
 9948 
 9949 // Immediate Or long
 9950 instruct orL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 con) %{
 9951   match(Set dst (OrL src1 con));
 9952   ins_cost(DEFAULT_COST);
 9953 
 9954   format %{ "ORI     $dst, $src1, $con \t// long" %}
 9955   size(4);
 9956   ins_encode %{
 9957     __ ori($dst$$Register, $src1$$Register, ($con$$constant) & 0xFFFF);
 9958   %}
 9959   ins_pipe(pipe_class_default);
 9960 %}
 9961 
 9962 // Xor Instructions
 9963 
 9964 // Register Xor
 9965 instruct xorI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9966   match(Set dst (XorI src1 src2));
 9967   format %{ "XOR     $dst, $src1, $src2" %}
 9968   size(4);
 9969   ins_encode %{
 9970     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9971   %}
 9972   ins_pipe(pipe_class_default);
 9973 %}
 9974 
 9975 // Expand does not work with above instruct. (??)
 9976 instruct xorI_reg_reg_2(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
 9977   // no match-rule
 9978   effect(DEF dst, USE src1, USE src2);
 9979   format %{ "XOR     $dst, $src1, $src2" %}
 9980   size(4);
 9981   ins_encode %{
 9982     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
 9983   %}
 9984   ins_pipe(pipe_class_default);
 9985 %}
 9986 
 9987 instruct tree_xorI_xorI_xorI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, iRegIsrc src3, iRegIsrc src4) %{
 9988   match(Set dst (XorI (XorI (XorI src1 src2) src3) src4));
 9989   ins_cost(DEFAULT_COST*3);
 9990 
 9991   expand %{
 9992     // FIXME: we should do this in the ideal world.
 9993     iRegIdst tmp1;
 9994     iRegIdst tmp2;
 9995     xorI_reg_reg(tmp1, src1, src2);
 9996     xorI_reg_reg_2(tmp2, src3, src4); // Adlc complains about xorI_reg_reg.
 9997     xorI_reg_reg(dst, tmp1, tmp2);
 9998   %}
 9999 %}
10000 
10001 // Immediate Xor
10002 instruct xorI_reg_uimm16(iRegIdst dst, iRegIsrc src1, uimmI16 src2) %{
10003   match(Set dst (XorI src1 src2));
10004   format %{ "XORI    $dst, $src1, $src2" %}
10005   size(4);
10006   ins_encode %{
10007     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10008   %}
10009   ins_pipe(pipe_class_default);
10010 %}
10011 
10012 // Register Xor Long
10013 instruct xorL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10014   match(Set dst (XorL src1 src2));
10015   ins_cost(DEFAULT_COST);
10016 
10017   format %{ "XOR     $dst, $src1, $src2 \t// long" %}
10018   size(4);
10019   ins_encode %{
10020     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10021   %}
10022   ins_pipe(pipe_class_default);
10023 %}
10024 
10025 // XorL + ConvL2I.
10026 instruct xorI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{
10027   match(Set dst (ConvL2I (XorL src1 src2)));
10028   ins_cost(DEFAULT_COST);
10029 
10030   format %{ "XOR     $dst, $src1, $src2 \t// long + l2i" %}
10031   size(4);
10032   ins_encode %{
10033     __ xorr($dst$$Register, $src1$$Register, $src2$$Register);
10034   %}
10035   ins_pipe(pipe_class_default);
10036 %}
10037 
10038 // Immediate Xor Long
10039 instruct xorL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2) %{
10040   match(Set dst (XorL src1 src2));
10041   ins_cost(DEFAULT_COST);
10042 
10043   format %{ "XORI    $dst, $src1, $src2 \t// long" %}
10044   size(4);
10045   ins_encode %{
10046     __ xori($dst$$Register, $src1$$Register, $src2$$constant);
10047   %}
10048   ins_pipe(pipe_class_default);
10049 %}
10050 
10051 instruct notI_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2) %{
10052   match(Set dst (XorI src1 src2));
10053   ins_cost(DEFAULT_COST);
10054 
10055   format %{ "NOT     $dst, $src1 ($src2)" %}
10056   size(4);
10057   ins_encode %{
10058     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10059   %}
10060   ins_pipe(pipe_class_default);
10061 %}
10062 
10063 instruct notL_reg(iRegLdst dst, iRegLsrc src1, immL_minus1 src2) %{
10064   match(Set dst (XorL src1 src2));
10065   ins_cost(DEFAULT_COST);
10066 
10067   format %{ "NOT     $dst, $src1 ($src2) \t// long" %}
10068   size(4);
10069   ins_encode %{
10070     __ nor($dst$$Register, $src1$$Register, $src1$$Register);
10071   %}
10072   ins_pipe(pipe_class_default);
10073 %}
10074 
10075 // And-complement
10076 instruct andcI_reg_reg(iRegIdst dst, iRegIsrc src1, immI_minus1 src2, iRegIsrc src3) %{
10077   match(Set dst (AndI (XorI src1 src2) src3));
10078   ins_cost(DEFAULT_COST);
10079 
10080   format %{ "ANDW    $dst, xori($src1, $src2), $src3" %}
10081   size(4);
10082   ins_encode( enc_andc(dst, src3, src1) );
10083   ins_pipe(pipe_class_default);
10084 %}
10085 
10086 // And-complement
10087 instruct andcL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
10088   // no match-rule, false predicate
10089   effect(DEF dst, USE src1, USE src2);
10090   predicate(false);
10091 
10092   format %{ "ANDC    $dst, $src1, $src2" %}
10093   size(4);
10094   ins_encode %{
10095     __ andc($dst$$Register, $src1$$Register, $src2$$Register);
10096   %}
10097   ins_pipe(pipe_class_default);
10098 %}
10099 
10100 //----------Moves between int/long and float/double----------------------------
10101 //
10102 // The following rules move values from int/long registers/stack-locations
10103 // to float/double registers/stack-locations and vice versa, without doing any
10104 // conversions. These rules are used to implement the bit-conversion methods
10105 // of java.lang.Float etc., e.g.
10106 //   int   floatToIntBits(float value)
10107 //   float intBitsToFloat(int bits)
10108 //
10109 // Notes on the implementation on ppc64:
10110 // For Power7 and earlier, the rules are limited to those which move between a
10111 // register and a stack-location, because we always have to go through memory
10112 // when moving between a float register and an integer register.
10113 // This restriction is removed in Power8 with the introduction of the mtfprd
10114 // and mffprd instructions.
10115 
10116 instruct moveL2D_reg(regD dst, iRegLsrc src) %{
10117   match(Set dst (MoveL2D src));
10118   predicate(VM_Version::has_mtfprd());
10119 
10120   format %{ "MTFPRD  $dst, $src" %}
10121   size(4);
10122   ins_encode %{
10123     __ mtfprd($dst$$FloatRegister, $src$$Register);
10124   %}
10125   ins_pipe(pipe_class_default);
10126 %}
10127 
10128 instruct moveI2D_reg(regD dst, iRegIsrc src) %{
10129   // no match-rule, false predicate
10130   effect(DEF dst, USE src);
10131   predicate(false);
10132 
10133   format %{ "MTFPRWA $dst, $src" %}
10134   size(4);
10135   ins_encode %{
10136     __ mtfprwa($dst$$FloatRegister, $src$$Register);
10137   %}
10138   ins_pipe(pipe_class_default);
10139 %}
10140 
10141 //---------- Chain stack slots between similar types --------
10142 
10143 // These are needed so that the rules below can match.
10144 
10145 // Load integer from stack slot
10146 instruct stkI_to_regI(iRegIdst dst, stackSlotI src) %{
10147   match(Set dst src);
10148   ins_cost(MEMORY_REF_COST);
10149 
10150   format %{ "LWZ     $dst, $src" %}
10151   size(4);
10152   ins_encode( enc_lwz(dst, src) );
10153   ins_pipe(pipe_class_memory);
10154 %}
10155 
10156 // Store integer to stack slot
10157 instruct regI_to_stkI(stackSlotI dst, iRegIsrc src) %{
10158   match(Set dst src);
10159   ins_cost(MEMORY_REF_COST);
10160 
10161   format %{ "STW     $src, $dst \t// stk" %}
10162   size(4);
10163   ins_encode( enc_stw(src, dst) ); // rs=rt
10164   ins_pipe(pipe_class_memory);
10165 %}
10166 
10167 // Load long from stack slot
10168 instruct stkL_to_regL(iRegLdst dst, stackSlotL src) %{
10169   match(Set dst src);
10170   ins_cost(MEMORY_REF_COST);
10171 
10172   format %{ "LD      $dst, $src \t// long" %}
10173   size(4);
10174   ins_encode( enc_ld(dst, src) );
10175   ins_pipe(pipe_class_memory);
10176 %}
10177 
10178 // Store long to stack slot
10179 instruct regL_to_stkL(stackSlotL dst, iRegLsrc src) %{
10180   match(Set dst src);
10181   ins_cost(MEMORY_REF_COST);
10182 
10183   format %{ "STD     $src, $dst \t// long" %}
10184   size(4);
10185   ins_encode( enc_std(src, dst) ); // rs=rt
10186   ins_pipe(pipe_class_memory);
10187 %}
10188 
10189 //----------Moves between int and float
10190 
10191 // Move float value from float stack-location to integer register.
10192 instruct moveF2I_stack_reg(iRegIdst dst, stackSlotF src) %{
10193   match(Set dst (MoveF2I src));
10194   ins_cost(MEMORY_REF_COST);
10195 
10196   format %{ "LWZ     $dst, $src \t// MoveF2I" %}
10197   size(4);
10198   ins_encode( enc_lwz(dst, src) );
10199   ins_pipe(pipe_class_memory);
10200 %}
10201 
10202 // Move float value from float register to integer stack-location.
10203 instruct moveF2I_reg_stack(stackSlotI dst, regF src) %{
10204   match(Set dst (MoveF2I src));
10205   ins_cost(MEMORY_REF_COST);
10206 
10207   format %{ "STFS    $src, $dst \t// MoveF2I" %}
10208   size(4);
10209   ins_encode( enc_stfs(src, dst) );
10210   ins_pipe(pipe_class_memory);
10211 %}
10212 
10213 // Move integer value from integer stack-location to float register.
10214 instruct moveI2F_stack_reg(regF dst, stackSlotI src) %{
10215   match(Set dst (MoveI2F src));
10216   ins_cost(MEMORY_REF_COST);
10217 
10218   format %{ "LFS     $dst, $src \t// MoveI2F" %}
10219   size(4);
10220   ins_encode %{
10221     int Idisp = $src$$disp + frame_slots_bias($src$$base, ra_);
10222     __ lfs($dst$$FloatRegister, Idisp, $src$$base$$Register);
10223   %}
10224   ins_pipe(pipe_class_memory);
10225 %}
10226 
10227 // Move integer value from integer register to float stack-location.
10228 instruct moveI2F_reg_stack(stackSlotF dst, iRegIsrc src) %{
10229   match(Set dst (MoveI2F src));
10230   ins_cost(MEMORY_REF_COST);
10231 
10232   format %{ "STW     $src, $dst \t// MoveI2F" %}
10233   size(4);
10234   ins_encode( enc_stw(src, dst) );
10235   ins_pipe(pipe_class_memory);
10236 %}
10237 
10238 //----------Moves between long and float
10239 
10240 instruct moveF2L_reg_stack(stackSlotL dst, regF src) %{
10241   // no match-rule, false predicate
10242   effect(DEF dst, USE src);
10243   predicate(false);
10244 
10245   format %{ "storeD  $src, $dst \t// STACK" %}
10246   size(4);
10247   ins_encode( enc_stfd(src, dst) );
10248   ins_pipe(pipe_class_default);
10249 %}
10250 
10251 //----------Moves between long and double
10252 
10253 // Move double value from double stack-location to long register.
10254 instruct moveD2L_stack_reg(iRegLdst dst, stackSlotD src) %{
10255   match(Set dst (MoveD2L src));
10256   ins_cost(MEMORY_REF_COST);
10257   size(4);
10258   format %{ "LD      $dst, $src \t// MoveD2L" %}
10259   ins_encode( enc_ld(dst, src) );
10260   ins_pipe(pipe_class_memory);
10261 %}
10262 
10263 // Move double value from double register to long stack-location.
10264 instruct moveD2L_reg_stack(stackSlotL dst, regD src) %{
10265   match(Set dst (MoveD2L src));
10266   effect(DEF dst, USE src);
10267   ins_cost(MEMORY_REF_COST);
10268 
10269   format %{ "STFD    $src, $dst \t// MoveD2L" %}
10270   size(4);
10271   ins_encode( enc_stfd(src, dst) );
10272   ins_pipe(pipe_class_memory);
10273 %}
10274 
10275 // Move long value from long stack-location to double register.
10276 instruct moveL2D_stack_reg(regD dst, stackSlotL src) %{
10277   match(Set dst (MoveL2D src));
10278   ins_cost(MEMORY_REF_COST);
10279 
10280   format %{ "LFD     $dst, $src \t// MoveL2D" %}
10281   size(4);
10282   ins_encode( enc_lfd(dst, src) );
10283   ins_pipe(pipe_class_memory);
10284 %}
10285 
10286 // Move long value from long register to double stack-location.
10287 instruct moveL2D_reg_stack(stackSlotD dst, iRegLsrc src) %{
10288   match(Set dst (MoveL2D src));
10289   ins_cost(MEMORY_REF_COST);
10290 
10291   format %{ "STD     $src, $dst \t// MoveL2D" %}
10292   size(4);
10293   ins_encode( enc_std(src, dst) );
10294   ins_pipe(pipe_class_memory);
10295 %}
10296 
10297 //----------Register Move Instructions-----------------------------------------
10298 
10299 // Replicate for Superword
10300 
10301 instruct moveReg(iRegLdst dst, iRegIsrc src) %{
10302   predicate(false);
10303   effect(DEF dst, USE src);
10304 
10305   format %{ "MR      $dst, $src \t// replicate " %}
10306   // variable size, 0 or 4.
10307   ins_encode %{
10308     __ mr_if_needed($dst$$Register, $src$$Register);
10309   %}
10310   ins_pipe(pipe_class_default);
10311 %}
10312 
10313 //----------Cast instructions (Java-level type cast)---------------------------
10314 
10315 // Cast Long to Pointer for unsafe natives.
10316 instruct castX2P(iRegPdst dst, iRegLsrc src) %{
10317   match(Set dst (CastX2P src));
10318 
10319   format %{ "MR      $dst, $src \t// Long->Ptr" %}
10320   // variable size, 0 or 4.
10321   ins_encode %{
10322     __ mr_if_needed($dst$$Register, $src$$Register);
10323   %}
10324  ins_pipe(pipe_class_default);
10325 %}
10326 
10327 // Cast Pointer to Long for unsafe natives.
10328 instruct castP2X(iRegLdst dst, iRegP_N2P src) %{
10329   match(Set dst (CastP2X src));
10330 
10331   format %{ "MR      $dst, $src \t// Ptr->Long" %}
10332   // variable size, 0 or 4.
10333   ins_encode %{
10334     __ mr_if_needed($dst$$Register, $src$$Register);
10335   %}
10336   ins_pipe(pipe_class_default);
10337 %}
10338 
10339 instruct castPP(iRegPdst dst) %{
10340   match(Set dst (CastPP dst));
10341   format %{ " -- \t// castPP of $dst" %}
10342   size(0);
10343   ins_encode( /*empty*/ );
10344   ins_pipe(pipe_class_default);
10345 %}
10346 
10347 instruct castII(iRegIdst dst) %{
10348   match(Set dst (CastII dst));
10349   format %{ " -- \t// castII of $dst" %}
10350   size(0);
10351   ins_encode( /*empty*/ );
10352   ins_pipe(pipe_class_default);
10353 %}
10354 
10355 instruct castLL(iRegLdst dst) %{
10356   match(Set dst (CastLL dst));
10357   format %{ " -- \t// castLL of $dst" %}
10358   size(0);
10359   ins_encode( /*empty*/ );
10360   ins_pipe(pipe_class_default);
10361 %}
10362 
10363 instruct castFF(regF dst) %{
10364   match(Set dst (CastFF dst));
10365   format %{ " -- \t// castFF of $dst" %}
10366   size(0);
10367   ins_encode( /*empty*/ );
10368   ins_pipe(pipe_class_default);
10369 %}
10370 
10371 instruct castDD(regD dst) %{
10372   match(Set dst (CastDD dst));
10373   format %{ " -- \t// castDD of $dst" %}
10374   size(0);
10375   ins_encode( /*empty*/ );
10376   ins_pipe(pipe_class_default);
10377 %}
10378 
10379 instruct castVV8(iRegLdst dst) %{
10380   match(Set dst (CastVV dst));
10381   format %{ " -- \t// castVV of $dst" %}
10382   size(0);
10383   ins_encode( /*empty*/ );
10384   ins_pipe(pipe_class_default);
10385 %}
10386 
10387 instruct castVV16(vecX dst) %{
10388   match(Set dst (CastVV dst));
10389   format %{ " -- \t// castVV of $dst" %}
10390   size(0);
10391   ins_encode( /*empty*/ );
10392   ins_pipe(pipe_class_default);
10393 %}
10394 
10395 instruct checkCastPP(iRegPdst dst) %{
10396   match(Set dst (CheckCastPP dst));
10397   format %{ " -- \t// checkcastPP of $dst" %}
10398   size(0);
10399   ins_encode( /*empty*/ );
10400   ins_pipe(pipe_class_default);
10401 %}
10402 
10403 //----------Convert instructions-----------------------------------------------
10404 
10405 // Convert to boolean.
10406 
10407 // int_to_bool(src) : { 1   if src != 0
10408 //                    { 0   else
10409 //
10410 // strategy:
10411 // 1) Count leading zeros of 32 bit-value src,
10412 //    this returns 32 (0b10.0000) iff src == 0 and <32 otherwise.
10413 // 2) Shift 5 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10414 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10415 
10416 // convI2Bool
10417 instruct convI2Bool_reg__cntlz_Ex(iRegIdst dst, iRegIsrc src) %{
10418   match(Set dst (Conv2B src));
10419   predicate(UseCountLeadingZerosInstructionsPPC64);
10420   ins_cost(DEFAULT_COST);
10421 
10422   expand %{
10423     immI shiftAmount %{ 0x5 %}
10424     uimmI16 mask %{ 0x1 %}
10425     iRegIdst tmp1;
10426     iRegIdst tmp2;
10427     countLeadingZerosI(tmp1, src);
10428     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10429     xorI_reg_uimm16(dst, tmp2, mask);
10430   %}
10431 %}
10432 
10433 instruct convI2Bool_reg__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx) %{
10434   match(Set dst (Conv2B src));
10435   effect(TEMP crx);
10436   predicate(!UseCountLeadingZerosInstructionsPPC64);
10437   ins_cost(DEFAULT_COST);
10438 
10439   format %{ "CMPWI   $crx, $src, #0 \t// convI2B"
10440             "LI      $dst, #0\n\t"
10441             "BEQ     $crx, done\n\t"
10442             "LI      $dst, #1\n"
10443             "done:" %}
10444   size(16);
10445   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x0, 0x1) );
10446   ins_pipe(pipe_class_compare);
10447 %}
10448 
10449 // ConvI2B + XorI
10450 instruct xorI_convI2Bool_reg_immIvalue1__cntlz_Ex(iRegIdst dst, iRegIsrc src, immI_1 mask) %{
10451   match(Set dst (XorI (Conv2B src) mask));
10452   predicate(UseCountLeadingZerosInstructionsPPC64);
10453   ins_cost(DEFAULT_COST);
10454 
10455   expand %{
10456     immI shiftAmount %{ 0x5 %}
10457     iRegIdst tmp1;
10458     countLeadingZerosI(tmp1, src);
10459     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10460   %}
10461 %}
10462 
10463 instruct xorI_convI2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegIsrc src, flagsReg crx, immI_1 mask) %{
10464   match(Set dst (XorI (Conv2B src) mask));
10465   effect(TEMP crx);
10466   predicate(!UseCountLeadingZerosInstructionsPPC64);
10467   ins_cost(DEFAULT_COST);
10468 
10469   format %{ "CMPWI   $crx, $src, #0 \t// Xor(convI2B($src), $mask)"
10470             "LI      $dst, #1\n\t"
10471             "BEQ     $crx, done\n\t"
10472             "LI      $dst, #0\n"
10473             "done:" %}
10474   size(16);
10475   ins_encode( enc_convI2B_regI__cmove(dst, src, crx, 0x1, 0x0) );
10476   ins_pipe(pipe_class_compare);
10477 %}
10478 
10479 // AndI 0b0..010..0 + ConvI2B
10480 instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerOf2 mask) %{
10481   match(Set dst (Conv2B (AndI src mask)));
10482   predicate(UseRotateAndMaskInstructionsPPC64);
10483   ins_cost(DEFAULT_COST);
10484 
10485   format %{ "RLWINM  $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
10486   size(4);
10487   ins_encode %{
10488     __ rlwinm($dst$$Register, $src$$Register, 32 - log2i_exact((juint)($mask$$constant)), 31, 31);
10489   %}
10490   ins_pipe(pipe_class_default);
10491 %}
10492 
10493 // Convert pointer to boolean.
10494 //
10495 // ptr_to_bool(src) : { 1   if src != 0
10496 //                    { 0   else
10497 //
10498 // strategy:
10499 // 1) Count leading zeros of 64 bit-value src,
10500 //    this returns 64 (0b100.0000) iff src == 0 and <64 otherwise.
10501 // 2) Shift 6 bits to the right, result is 0b1 iff src == 0, 0b0 otherwise.
10502 // 3) Xori the result to get 0b1 if src != 0 and 0b0 if src == 0.
10503 
10504 // ConvP2B
10505 instruct convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src) %{
10506   match(Set dst (Conv2B src));
10507   predicate(UseCountLeadingZerosInstructionsPPC64);
10508   ins_cost(DEFAULT_COST);
10509 
10510   expand %{
10511     immI shiftAmount %{ 0x6 %}
10512     uimmI16 mask %{ 0x1 %}
10513     iRegIdst tmp1;
10514     iRegIdst tmp2;
10515     countLeadingZerosP(tmp1, src);
10516     urShiftI_reg_imm(tmp2, tmp1, shiftAmount);
10517     xorI_reg_uimm16(dst, tmp2, mask);
10518   %}
10519 %}
10520 
10521 instruct convP2Bool_reg__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx) %{
10522   match(Set dst (Conv2B src));
10523   effect(TEMP crx);
10524   predicate(!UseCountLeadingZerosInstructionsPPC64);
10525   ins_cost(DEFAULT_COST);
10526 
10527   format %{ "CMPDI   $crx, $src, #0 \t// convP2B"
10528             "LI      $dst, #0\n\t"
10529             "BEQ     $crx, done\n\t"
10530             "LI      $dst, #1\n"
10531             "done:" %}
10532   size(16);
10533   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x0, 0x1) );
10534   ins_pipe(pipe_class_compare);
10535 %}
10536 
10537 // ConvP2B + XorI
10538 instruct xorI_convP2Bool_reg__cntlz_Ex(iRegIdst dst, iRegP_N2P src, immI_1 mask) %{
10539   match(Set dst (XorI (Conv2B src) mask));
10540   predicate(UseCountLeadingZerosInstructionsPPC64);
10541   ins_cost(DEFAULT_COST);
10542 
10543   expand %{
10544     immI shiftAmount %{ 0x6 %}
10545     iRegIdst tmp1;
10546     countLeadingZerosP(tmp1, src);
10547     urShiftI_reg_imm(dst, tmp1, shiftAmount);
10548   %}
10549 %}
10550 
10551 instruct xorI_convP2Bool_reg_immIvalue1__cmove(iRegIdst dst, iRegP_N2P src, flagsReg crx, immI_1 mask) %{
10552   match(Set dst (XorI (Conv2B src) mask));
10553   effect(TEMP crx);
10554   predicate(!UseCountLeadingZerosInstructionsPPC64);
10555   ins_cost(DEFAULT_COST);
10556 
10557   format %{ "CMPDI   $crx, $src, #0 \t// XorI(convP2B($src), $mask)"
10558             "LI      $dst, #1\n\t"
10559             "BEQ     $crx, done\n\t"
10560             "LI      $dst, #0\n"
10561             "done:" %}
10562   size(16);
10563   ins_encode( enc_convP2B_regP__cmove(dst, src, crx, 0x1, 0x0) );
10564   ins_pipe(pipe_class_compare);
10565 %}
10566 
10567 // if src1 < src2, return -1 else return 0
10568 instruct cmpLTMask_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
10569   match(Set dst (CmpLTMask src1 src2));
10570   ins_cost(DEFAULT_COST*4);
10571 
10572   expand %{
10573     iRegLdst src1s;
10574     iRegLdst src2s;
10575     iRegLdst diff;
10576     convI2L_reg(src1s, src1); // Ensure proper sign extension.
10577     convI2L_reg(src2s, src2); // Ensure proper sign extension.
10578     subL_reg_reg(diff, src1s, src2s);
10579     // Need to consider >=33 bit result, therefore we need signmaskL.
10580     signmask64I_regL(dst, diff);
10581   %}
10582 %}
10583 
10584 instruct cmpLTMask_reg_immI0(iRegIdst dst, iRegIsrc src1, immI_0 src2) %{
10585   match(Set dst (CmpLTMask src1 src2)); // if src1 < src2, return -1 else return 0
10586   format %{ "SRAWI   $dst, $src1, $src2 \t// CmpLTMask" %}
10587   size(4);
10588   ins_encode %{
10589     __ srawi($dst$$Register, $src1$$Register, 0x1f);
10590   %}
10591   ins_pipe(pipe_class_default);
10592 %}
10593 
10594 //----------Arithmetic Conversion Instructions---------------------------------
10595 
10596 // Convert to Byte  -- nop
10597 // Convert to Short -- nop
10598 
10599 // Convert to Int
10600 
10601 instruct convB2I_reg(iRegIdst dst, iRegIsrc src, immI_24 amount) %{
10602   match(Set dst (RShiftI (LShiftI src amount) amount));
10603   format %{ "EXTSB   $dst, $src \t// byte->int" %}
10604   size(4);
10605   ins_encode %{
10606     __ extsb($dst$$Register, $src$$Register);
10607   %}
10608   ins_pipe(pipe_class_default);
10609 %}
10610 
10611 instruct extsh(iRegIdst dst, iRegIsrc src) %{
10612   effect(DEF dst, USE src);
10613 
10614   size(4);
10615   ins_encode %{
10616     __ extsh($dst$$Register, $src$$Register);
10617   %}
10618   ins_pipe(pipe_class_default);
10619 %}
10620 
10621 // LShiftI 16 + RShiftI 16 converts short to int.
10622 instruct convS2I_reg(iRegIdst dst, iRegIsrc src, immI_16 amount) %{
10623   match(Set dst (RShiftI (LShiftI src amount) amount));
10624   format %{ "EXTSH   $dst, $src \t// short->int" %}
10625   size(4);
10626   ins_encode %{
10627     __ extsh($dst$$Register, $src$$Register);
10628   %}
10629   ins_pipe(pipe_class_default);
10630 %}
10631 
10632 // ConvL2I + ConvI2L: Sign extend int in long register.
10633 instruct sxtI_L2L_reg(iRegLdst dst, iRegLsrc src) %{
10634   match(Set dst (ConvI2L (ConvL2I src)));
10635 
10636   format %{ "EXTSW   $dst, $src \t// long->long" %}
10637   size(4);
10638   ins_encode %{
10639     __ extsw($dst$$Register, $src$$Register);
10640   %}
10641   ins_pipe(pipe_class_default);
10642 %}
10643 
10644 instruct convL2I_reg(iRegIdst dst, iRegLsrc src) %{
10645   match(Set dst (ConvL2I src));
10646   format %{ "MR      $dst, $src \t// long->int" %}
10647   // variable size, 0 or 4
10648   ins_encode %{
10649     __ mr_if_needed($dst$$Register, $src$$Register);
10650   %}
10651   ins_pipe(pipe_class_default);
10652 %}
10653 
10654 instruct convD2IRaw_regD(regD dst, regD src) %{
10655   // no match-rule, false predicate
10656   effect(DEF dst, USE src);
10657   predicate(false);
10658 
10659   format %{ "FCTIWZ $dst, $src \t// convD2I, $src != NaN" %}
10660   size(4);
10661   ins_encode %{
10662     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10663   %}
10664   ins_pipe(pipe_class_default);
10665 %}
10666 
10667 instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
10668   // no match-rule, false predicate
10669   effect(DEF dst, USE crx, USE src);
10670   predicate(false);
10671 
10672   ins_variable_size_depending_on_alignment(true);
10673 
10674   format %{ "cmovI   $crx, $dst, $src" %}
10675   // Worst case is branch + move + stop, no stop without scheduler.
10676   size(8);
10677   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10678   ins_pipe(pipe_class_default);
10679 %}
10680 
10681 instruct cmovI_bso_reg(iRegIdst dst, flagsRegSrc crx, regD src) %{
10682   // no match-rule, false predicate
10683   effect(DEF dst, USE crx, USE src);
10684   predicate(false);
10685 
10686   ins_variable_size_depending_on_alignment(true);
10687 
10688   format %{ "cmovI   $crx, $dst, $src" %}
10689   // Worst case is branch + move + stop, no stop without scheduler.
10690   size(8);
10691   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10692   ins_pipe(pipe_class_default);
10693 %}
10694 
10695 instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
10696   // no match-rule, false predicate
10697   effect(DEF dst, USE crx, USE mem);
10698   predicate(false);
10699 
10700   format %{ "CmovI   $dst, $crx, $mem \t// postalloc expanded" %}
10701   postalloc_expand %{
10702     //
10703     // replaces
10704     //
10705     //   region  dst  crx  mem
10706     //    \       |    |   /
10707     //     dst=cmovI_bso_stackSlotL_conLvalue0
10708     //
10709     // with
10710     //
10711     //   region  dst
10712     //    \       /
10713     //     dst=loadConI16(0)
10714     //      |
10715     //      ^  region  dst  crx  mem
10716     //      |   \       |    |    /
10717     //      dst=cmovI_bso_stackSlotL
10718     //
10719 
10720     // Create new nodes.
10721     MachNode *m1 = new loadConI16Node();
10722     MachNode *m2 = new cmovI_bso_stackSlotLNode();
10723 
10724     // inputs for new nodes
10725     m1->add_req(n_region);
10726     m2->add_req(n_region, n_crx, n_mem);
10727 
10728     // precedences for new nodes
10729     m2->add_prec(m1);
10730 
10731     // operands for new nodes
10732     m1->_opnds[0] = op_dst;
10733     m1->_opnds[1] = new immI16Oper(0);
10734 
10735     m2->_opnds[0] = op_dst;
10736     m2->_opnds[1] = op_crx;
10737     m2->_opnds[2] = op_mem;
10738 
10739     // registers for new nodes
10740     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10741     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10742 
10743     // Insert new nodes.
10744     nodes->push(m1);
10745     nodes->push(m2);
10746   %}
10747 %}
10748 
10749 instruct cmovI_bso_reg_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, regD src) %{
10750   // no match-rule, false predicate
10751   effect(DEF dst, USE crx, USE src);
10752   predicate(false);
10753 
10754   format %{ "CmovI   $dst, $crx, $src \t// postalloc expanded" %}
10755   postalloc_expand %{
10756     //
10757     // replaces
10758     //
10759     //   region  dst  crx  src
10760     //    \       |    |   /
10761     //     dst=cmovI_bso_reg_conLvalue0
10762     //
10763     // with
10764     //
10765     //   region  dst
10766     //    \       /
10767     //     dst=loadConI16(0)
10768     //      |
10769     //      ^  region  dst  crx  src
10770     //      |   \       |    |    /
10771     //      dst=cmovI_bso_reg
10772     //
10773 
10774     // Create new nodes.
10775     MachNode *m1 = new loadConI16Node();
10776     MachNode *m2 = new cmovI_bso_regNode();
10777 
10778     // inputs for new nodes
10779     m1->add_req(n_region);
10780     m2->add_req(n_region, n_crx, n_src);
10781 
10782     // precedences for new nodes
10783     m2->add_prec(m1);
10784 
10785     // operands for new nodes
10786     m1->_opnds[0] = op_dst;
10787     m1->_opnds[1] = new immI16Oper(0);
10788 
10789     m2->_opnds[0] = op_dst;
10790     m2->_opnds[1] = op_crx;
10791     m2->_opnds[2] = op_src;
10792 
10793     // registers for new nodes
10794     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10795     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
10796 
10797     // Insert new nodes.
10798     nodes->push(m1);
10799     nodes->push(m2);
10800   %}
10801 %}
10802 
10803 // Double to Int conversion, NaN is mapped to 0.
10804 instruct convD2I_reg_ExEx(iRegIdst dst, regD src) %{
10805   match(Set dst (ConvD2I src));
10806   predicate(!VM_Version::has_mtfprd());
10807   ins_cost(DEFAULT_COST);
10808 
10809   expand %{
10810     regD tmpD;
10811     stackSlotL tmpS;
10812     flagsReg crx;
10813     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10814     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10815     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
10816     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10817   %}
10818 %}
10819 
10820 // Double to Int conversion, NaN is mapped to 0. Special version for Power8.
10821 instruct convD2I_reg_mffprd_ExEx(iRegIdst dst, regD src) %{
10822   match(Set dst (ConvD2I src));
10823   predicate(VM_Version::has_mtfprd());
10824   ins_cost(DEFAULT_COST);
10825 
10826   expand %{
10827     regD tmpD;
10828     flagsReg crx;
10829     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10830     convD2IRaw_regD(tmpD, src);                         // Convert float to int (speculated).
10831     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
10832   %}
10833 %}
10834 
10835 instruct convF2IRaw_regF(regF dst, regF src) %{
10836   // no match-rule, false predicate
10837   effect(DEF dst, USE src);
10838   predicate(false);
10839 
10840   format %{ "FCTIWZ $dst, $src \t// convF2I, $src != NaN" %}
10841   size(4);
10842   ins_encode %{
10843     __ fctiwz($dst$$FloatRegister, $src$$FloatRegister);
10844   %}
10845   ins_pipe(pipe_class_default);
10846 %}
10847 
10848 // Float to Int conversion, NaN is mapped to 0.
10849 instruct convF2I_regF_ExEx(iRegIdst dst, regF src) %{
10850   match(Set dst (ConvF2I src));
10851   predicate(!VM_Version::has_mtfprd());
10852   ins_cost(DEFAULT_COST);
10853 
10854   expand %{
10855     regF tmpF;
10856     stackSlotL tmpS;
10857     flagsReg crx;
10858     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10859     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10860     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
10861     cmovI_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
10862   %}
10863 %}
10864 
10865 // Float to Int conversion, NaN is mapped to 0. Special version for Power8.
10866 instruct convF2I_regF_mffprd_ExEx(iRegIdst dst, regF src) %{
10867   match(Set dst (ConvF2I src));
10868   predicate(VM_Version::has_mtfprd());
10869   ins_cost(DEFAULT_COST);
10870 
10871   expand %{
10872     regF tmpF;
10873     flagsReg crx;
10874     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
10875     convF2IRaw_regF(tmpF, src);                         // Convert float to int (speculated).
10876     cmovI_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
10877   %}
10878 %}
10879 
10880 // Convert to Long
10881 
10882 instruct convI2L_reg(iRegLdst dst, iRegIsrc src) %{
10883   match(Set dst (ConvI2L src));
10884   format %{ "EXTSW   $dst, $src \t// int->long" %}
10885   size(4);
10886   ins_encode %{
10887     __ extsw($dst$$Register, $src$$Register);
10888   %}
10889   ins_pipe(pipe_class_default);
10890 %}
10891 
10892 // Zero-extend: convert unsigned int to long (convUI2L).
10893 instruct zeroExtendL_regI(iRegLdst dst, iRegIsrc src, immL_32bits mask) %{
10894   match(Set dst (AndL (ConvI2L src) mask));
10895   ins_cost(DEFAULT_COST);
10896 
10897   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10898   size(4);
10899   ins_encode %{
10900     __ clrldi($dst$$Register, $src$$Register, 32);
10901   %}
10902   ins_pipe(pipe_class_default);
10903 %}
10904 
10905 // Zero-extend: convert unsigned int to long in long register.
10906 instruct zeroExtendL_regL(iRegLdst dst, iRegLsrc src, immL_32bits mask) %{
10907   match(Set dst (AndL src mask));
10908   ins_cost(DEFAULT_COST);
10909 
10910   format %{ "CLRLDI  $dst, $src, #32 \t// zero-extend int to long" %}
10911   size(4);
10912   ins_encode %{
10913     __ clrldi($dst$$Register, $src$$Register, 32);
10914   %}
10915   ins_pipe(pipe_class_default);
10916 %}
10917 
10918 instruct convF2LRaw_regF(regF dst, regF src) %{
10919   // no match-rule, false predicate
10920   effect(DEF dst, USE src);
10921   predicate(false);
10922 
10923   format %{ "FCTIDZ $dst, $src \t// convF2L, $src != NaN" %}
10924   size(4);
10925   ins_encode %{
10926     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
10927   %}
10928   ins_pipe(pipe_class_default);
10929 %}
10930 
10931 instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
10932   // no match-rule, false predicate
10933   effect(DEF dst, USE crx, USE src);
10934   predicate(false);
10935 
10936   ins_variable_size_depending_on_alignment(true);
10937 
10938   format %{ "cmovL   $crx, $dst, $src" %}
10939   // Worst case is branch + move + stop, no stop without scheduler.
10940   size(8);
10941   ins_encode( enc_cmove_bso_stackSlotL(dst, crx, src) );
10942   ins_pipe(pipe_class_default);
10943 %}
10944 
10945 instruct cmovL_bso_reg(iRegLdst dst, flagsRegSrc crx, regD src) %{
10946   // no match-rule, false predicate
10947   effect(DEF dst, USE crx, USE src);
10948   predicate(false);
10949 
10950   ins_variable_size_depending_on_alignment(true);
10951 
10952   format %{ "cmovL   $crx, $dst, $src" %}
10953   // Worst case is branch + move + stop, no stop without scheduler.
10954   size(8);
10955   ins_encode( enc_cmove_bso_reg(dst, crx, src) );
10956   ins_pipe(pipe_class_default);
10957 %}
10958 
10959 instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
10960   // no match-rule, false predicate
10961   effect(DEF dst, USE crx, USE mem);
10962   predicate(false);
10963 
10964   format %{ "CmovL   $dst, $crx, $mem \t// postalloc expanded" %}
10965   postalloc_expand %{
10966     //
10967     // replaces
10968     //
10969     //   region  dst  crx  mem
10970     //    \       |    |   /
10971     //     dst=cmovL_bso_stackSlotL_conLvalue0
10972     //
10973     // with
10974     //
10975     //   region  dst
10976     //    \       /
10977     //     dst=loadConL16(0)
10978     //      |
10979     //      ^  region  dst  crx  mem
10980     //      |   \       |    |    /
10981     //      dst=cmovL_bso_stackSlotL
10982     //
10983 
10984     // Create new nodes.
10985     MachNode *m1 = new loadConL16Node();
10986     MachNode *m2 = new cmovL_bso_stackSlotLNode();
10987 
10988     // inputs for new nodes
10989     m1->add_req(n_region);
10990     m2->add_req(n_region, n_crx, n_mem);
10991     m2->add_prec(m1);
10992 
10993     // operands for new nodes
10994     m1->_opnds[0] = op_dst;
10995     m1->_opnds[1] = new immL16Oper(0);
10996     m2->_opnds[0] = op_dst;
10997     m2->_opnds[1] = op_crx;
10998     m2->_opnds[2] = op_mem;
10999 
11000     // registers for new nodes
11001     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11002     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11003 
11004     // Insert new nodes.
11005     nodes->push(m1);
11006     nodes->push(m2);
11007   %}
11008 %}
11009 
11010 instruct cmovL_bso_reg_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, regD src) %{
11011   // no match-rule, false predicate
11012   effect(DEF dst, USE crx, USE src);
11013   predicate(false);
11014 
11015   format %{ "CmovL   $dst, $crx, $src \t// postalloc expanded" %}
11016   postalloc_expand %{
11017     //
11018     // replaces
11019     //
11020     //   region  dst  crx  src
11021     //    \       |    |   /
11022     //     dst=cmovL_bso_reg_conLvalue0
11023     //
11024     // with
11025     //
11026     //   region  dst
11027     //    \       /
11028     //     dst=loadConL16(0)
11029     //      |
11030     //      ^  region  dst  crx  src
11031     //      |   \       |    |    /
11032     //      dst=cmovL_bso_reg
11033     //
11034 
11035     // Create new nodes.
11036     MachNode *m1 = new loadConL16Node();
11037     MachNode *m2 = new cmovL_bso_regNode();
11038 
11039     // inputs for new nodes
11040     m1->add_req(n_region);
11041     m2->add_req(n_region, n_crx, n_src);
11042     m2->add_prec(m1);
11043 
11044     // operands for new nodes
11045     m1->_opnds[0] = op_dst;
11046     m1->_opnds[1] = new immL16Oper(0);
11047     m2->_opnds[0] = op_dst;
11048     m2->_opnds[1] = op_crx;
11049     m2->_opnds[2] = op_src;
11050 
11051     // registers for new nodes
11052     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11053     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // dst
11054 
11055     // Insert new nodes.
11056     nodes->push(m1);
11057     nodes->push(m2);
11058   %}
11059 %}
11060 
11061 // Float to Long conversion, NaN is mapped to 0.
11062 instruct convF2L_reg_ExEx(iRegLdst dst, regF src) %{
11063   match(Set dst (ConvF2L src));
11064   predicate(!VM_Version::has_mtfprd());
11065   ins_cost(DEFAULT_COST);
11066 
11067   expand %{
11068     regF tmpF;
11069     stackSlotL tmpS;
11070     flagsReg crx;
11071     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11072     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11073     moveF2L_reg_stack(tmpS, tmpF);                      // Store float to stack (speculated).
11074     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11075   %}
11076 %}
11077 
11078 // Float to Long conversion, NaN is mapped to 0. Special version for Power8.
11079 instruct convF2L_reg_mffprd_ExEx(iRegLdst dst, regF src) %{
11080   match(Set dst (ConvF2L src));
11081   predicate(VM_Version::has_mtfprd());
11082   ins_cost(DEFAULT_COST);
11083 
11084   expand %{
11085     regF tmpF;
11086     flagsReg crx;
11087     cmpFUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11088     convF2LRaw_regF(tmpF, src);                         // Convert float to long (speculated).
11089     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpF);        // Cmove based on NaN check.
11090   %}
11091 %}
11092 
11093 instruct convD2LRaw_regD(regD dst, regD src) %{
11094   // no match-rule, false predicate
11095   effect(DEF dst, USE src);
11096   predicate(false);
11097 
11098   format %{ "FCTIDZ $dst, $src \t// convD2L $src != NaN" %}
11099   size(4);
11100   ins_encode %{
11101     __ fctidz($dst$$FloatRegister, $src$$FloatRegister);
11102   %}
11103   ins_pipe(pipe_class_default);
11104 %}
11105 
11106 // Double to Long conversion, NaN is mapped to 0.
11107 instruct convD2L_reg_ExEx(iRegLdst dst, regD src) %{
11108   match(Set dst (ConvD2L src));
11109   predicate(!VM_Version::has_mtfprd());
11110   ins_cost(DEFAULT_COST);
11111 
11112   expand %{
11113     regD tmpD;
11114     stackSlotL tmpS;
11115     flagsReg crx;
11116     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11117     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11118     moveD2L_reg_stack(tmpS, tmpD);                      // Store float to stack (speculated).
11119     cmovL_bso_stackSlotL_conLvalue0_Ex(dst, crx, tmpS); // Cmove based on NaN check.
11120   %}
11121 %}
11122 
11123 // Double to Long conversion, NaN is mapped to 0. Special version for Power8.
11124 instruct convD2L_reg_mffprd_ExEx(iRegLdst dst, regD src) %{
11125   match(Set dst (ConvD2L src));
11126   predicate(VM_Version::has_mtfprd());
11127   ins_cost(DEFAULT_COST);
11128 
11129   expand %{
11130     regD tmpD;
11131     flagsReg crx;
11132     cmpDUnordered_reg_reg(crx, src, src);               // Check whether src is NaN.
11133     convD2LRaw_regD(tmpD, src);                         // Convert float to long (speculated).
11134     cmovL_bso_reg_conLvalue0_Ex(dst, crx, tmpD);        // Cmove based on NaN check.
11135   %}
11136 %}
11137 
11138 // Convert to Float
11139 
11140 // Placed here as needed in expand.
11141 instruct convL2DRaw_regD(regD dst, regD src) %{
11142   // no match-rule, false predicate
11143   effect(DEF dst, USE src);
11144   predicate(false);
11145 
11146   format %{ "FCFID $dst, $src \t// convL2D" %}
11147   size(4);
11148   ins_encode %{
11149     __ fcfid($dst$$FloatRegister, $src$$FloatRegister);
11150   %}
11151   ins_pipe(pipe_class_default);
11152 %}
11153 
11154 // Placed here as needed in expand.
11155 instruct convD2F_reg(regF dst, regD src) %{
11156   match(Set dst (ConvD2F src));
11157   format %{ "FRSP    $dst, $src \t// convD2F" %}
11158   size(4);
11159   ins_encode %{
11160     __ frsp($dst$$FloatRegister, $src$$FloatRegister);
11161   %}
11162   ins_pipe(pipe_class_default);
11163 %}
11164 
11165 // Integer to Float conversion.
11166 instruct convI2F_ireg_Ex(regF dst, iRegIsrc src) %{
11167   match(Set dst (ConvI2F src));
11168   predicate(!VM_Version::has_fcfids());
11169   ins_cost(DEFAULT_COST);
11170 
11171   expand %{
11172     iRegLdst tmpL;
11173     stackSlotL tmpS;
11174     regD tmpD;
11175     regD tmpD2;
11176     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11177     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11178     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11179     convL2DRaw_regD(tmpD2, tmpD);        // Convert to double.
11180     convD2F_reg(dst, tmpD2);             // Convert double to float.
11181   %}
11182 %}
11183 
11184 instruct convL2FRaw_regF(regF dst, regD src) %{
11185   // no match-rule, false predicate
11186   effect(DEF dst, USE src);
11187   predicate(false);
11188 
11189   format %{ "FCFIDS $dst, $src \t// convL2F" %}
11190   size(4);
11191   ins_encode %{
11192     __ fcfids($dst$$FloatRegister, $src$$FloatRegister);
11193   %}
11194   ins_pipe(pipe_class_default);
11195 %}
11196 
11197 // Integer to Float conversion. Special version for Power7.
11198 instruct convI2F_ireg_fcfids_Ex(regF dst, iRegIsrc src) %{
11199   match(Set dst (ConvI2F src));
11200   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11201   ins_cost(DEFAULT_COST);
11202 
11203   expand %{
11204     iRegLdst tmpL;
11205     stackSlotL tmpS;
11206     regD tmpD;
11207     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11208     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11209     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11210     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11211   %}
11212 %}
11213 
11214 // Integer to Float conversion. Special version for Power8.
11215 instruct convI2F_ireg_mtfprd_Ex(regF dst, iRegIsrc src) %{
11216   match(Set dst (ConvI2F src));
11217   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11218   ins_cost(DEFAULT_COST);
11219 
11220   expand %{
11221     regD tmpD;
11222     moveI2D_reg(tmpD, src);
11223     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11224   %}
11225 %}
11226 
11227 // L2F to avoid runtime call.
11228 instruct convL2F_ireg_fcfids_Ex(regF dst, iRegLsrc src) %{
11229   match(Set dst (ConvL2F src));
11230   predicate(VM_Version::has_fcfids() && !VM_Version::has_mtfprd());
11231   ins_cost(DEFAULT_COST);
11232 
11233   expand %{
11234     stackSlotL tmpS;
11235     regD tmpD;
11236     regL_to_stkL(tmpS, src);             // Store long to stack.
11237     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11238     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11239   %}
11240 %}
11241 
11242 // L2F to avoid runtime call.  Special version for Power8.
11243 instruct convL2F_ireg_mtfprd_Ex(regF dst, iRegLsrc src) %{
11244   match(Set dst (ConvL2F src));
11245   predicate(VM_Version::has_fcfids() && VM_Version::has_mtfprd());
11246   ins_cost(DEFAULT_COST);
11247 
11248   expand %{
11249     regD tmpD;
11250     moveL2D_reg(tmpD, src);
11251     convL2FRaw_regF(dst, tmpD);          // Convert to float.
11252   %}
11253 %}
11254 
11255 // Moved up as used in expand.
11256 //instruct convD2F_reg(regF dst, regD src) %{%}
11257 
11258 // Convert to Double
11259 
11260 // Integer to Double conversion.
11261 instruct convI2D_reg_Ex(regD dst, iRegIsrc src) %{
11262   match(Set dst (ConvI2D src));
11263   predicate(!VM_Version::has_mtfprd());
11264   ins_cost(DEFAULT_COST);
11265 
11266   expand %{
11267     iRegLdst tmpL;
11268     stackSlotL tmpS;
11269     regD tmpD;
11270     convI2L_reg(tmpL, src);              // Sign-extension int to long.
11271     regL_to_stkL(tmpS, tmpL);            // Store long to stack.
11272     moveL2D_stack_reg(tmpD, tmpS);       // Load long into double register.
11273     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11274   %}
11275 %}
11276 
11277 // Integer to Double conversion. Special version for Power8.
11278 instruct convI2D_reg_mtfprd_Ex(regD dst, iRegIsrc src) %{
11279   match(Set dst (ConvI2D src));
11280   predicate(VM_Version::has_mtfprd());
11281   ins_cost(DEFAULT_COST);
11282 
11283   expand %{
11284     regD tmpD;
11285     moveI2D_reg(tmpD, src);
11286     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11287   %}
11288 %}
11289 
11290 // Long to Double conversion
11291 instruct convL2D_reg_Ex(regD dst, stackSlotL src) %{
11292   match(Set dst (ConvL2D src));
11293   ins_cost(DEFAULT_COST + MEMORY_REF_COST);
11294 
11295   expand %{
11296     regD tmpD;
11297     moveL2D_stack_reg(tmpD, src);
11298     convL2DRaw_regD(dst, tmpD);
11299   %}
11300 %}
11301 
11302 // Long to Double conversion. Special version for Power8.
11303 instruct convL2D_reg_mtfprd_Ex(regD dst, iRegLsrc src) %{
11304   match(Set dst (ConvL2D src));
11305   predicate(VM_Version::has_mtfprd());
11306   ins_cost(DEFAULT_COST);
11307 
11308   expand %{
11309     regD tmpD;
11310     moveL2D_reg(tmpD, src);
11311     convL2DRaw_regD(dst, tmpD);          // Convert to double.
11312   %}
11313 %}
11314 
11315 instruct convF2D_reg(regD dst, regF src) %{
11316   match(Set dst (ConvF2D src));
11317   format %{ "FMR     $dst, $src \t// float->double" %}
11318   // variable size, 0 or 4
11319   ins_encode %{
11320     __ fmr_if_needed($dst$$FloatRegister, $src$$FloatRegister);
11321   %}
11322   ins_pipe(pipe_class_default);
11323 %}
11324 
11325 //----------Control Flow Instructions------------------------------------------
11326 // Compare Instructions
11327 
11328 // Compare Integers
11329 instruct cmpI_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11330   match(Set crx (CmpI src1 src2));
11331   size(4);
11332   format %{ "CMPW    $crx, $src1, $src2" %}
11333   ins_encode %{
11334     __ cmpw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11335   %}
11336   ins_pipe(pipe_class_compare);
11337 %}
11338 
11339 instruct cmpI_reg_imm16(flagsReg crx, iRegIsrc src1, immI16 src2) %{
11340   match(Set crx (CmpI src1 src2));
11341   format %{ "CMPWI   $crx, $src1, $src2" %}
11342   size(4);
11343   ins_encode %{
11344     __ cmpwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11345   %}
11346   ins_pipe(pipe_class_compare);
11347 %}
11348 
11349 // (src1 & src2) == 0?
11350 instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero) %{
11351   match(Set cr0 (CmpI (AndI src1 src2) zero));
11352   // r0 is killed
11353   format %{ "ANDI    R0, $src1, $src2 \t// BTST int" %}
11354   size(4);
11355   ins_encode %{
11356     __ andi_(R0, $src1$$Register, $src2$$constant);
11357   %}
11358   ins_pipe(pipe_class_compare);
11359 %}
11360 
11361 instruct cmpL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11362   match(Set crx (CmpL src1 src2));
11363   format %{ "CMPD    $crx, $src1, $src2" %}
11364   size(4);
11365   ins_encode %{
11366     __ cmpd($crx$$CondRegister, $src1$$Register, $src2$$Register);
11367   %}
11368   ins_pipe(pipe_class_compare);
11369 %}
11370 
11371 instruct cmpL_reg_imm16(flagsReg crx, iRegLsrc src1, immL16 src2) %{
11372   match(Set crx (CmpL src1 src2));
11373   format %{ "CMPDI   $crx, $src1, $src2" %}
11374   size(4);
11375   ins_encode %{
11376     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11377   %}
11378   ins_pipe(pipe_class_compare);
11379 %}
11380 
11381 // Added CmpUL for LoopPredicate.
11382 instruct cmpUL_reg_reg(flagsReg crx, iRegLsrc src1, iRegLsrc src2) %{
11383   match(Set crx (CmpUL src1 src2));
11384   format %{ "CMPLD   $crx, $src1, $src2" %}
11385   size(4);
11386   ins_encode %{
11387     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11388   %}
11389   ins_pipe(pipe_class_compare);
11390 %}
11391 
11392 instruct cmpUL_reg_imm16(flagsReg crx, iRegLsrc src1, uimmL16 src2) %{
11393   match(Set crx (CmpUL src1 src2));
11394   format %{ "CMPLDI  $crx, $src1, $src2" %}
11395   size(4);
11396   ins_encode %{
11397     __ cmpldi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11398   %}
11399   ins_pipe(pipe_class_compare);
11400 %}
11401 
11402 instruct testL_reg_reg(flagsRegCR0 cr0, iRegLsrc src1, iRegLsrc src2, immL_0 zero) %{
11403   match(Set cr0 (CmpL (AndL src1 src2) zero));
11404   // r0 is killed
11405   format %{ "AND     R0, $src1, $src2 \t// BTST long" %}
11406   size(4);
11407   ins_encode %{
11408     __ and_(R0, $src1$$Register, $src2$$Register);
11409   %}
11410   ins_pipe(pipe_class_compare);
11411 %}
11412 
11413 instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero) %{
11414   match(Set cr0 (CmpL (AndL src1 src2) zero));
11415   // r0 is killed
11416   format %{ "ANDI    R0, $src1, $src2 \t// BTST long" %}
11417   size(4);
11418   ins_encode %{
11419     __ andi_(R0, $src1$$Register, $src2$$constant);
11420   %}
11421   ins_pipe(pipe_class_compare);
11422 %}
11423 
11424 // Manifest a CmpL3 result in an integer register.
11425 instruct cmpL3_reg_reg(iRegIdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
11426   match(Set dst (CmpL3 src1 src2));
11427   effect(KILL cr0);
11428   ins_cost(DEFAULT_COST * 5);
11429   size(VM_Version::has_brw() ? 16 : 20);
11430 
11431   format %{ "cmpL3_reg_reg $dst, $src1, $src2" %}
11432 
11433   ins_encode %{
11434     __ cmpd(CCR0, $src1$$Register, $src2$$Register);
11435     __ set_cmp3($dst$$Register);
11436   %}
11437   ins_pipe(pipe_class_default);
11438 %}
11439 
11440 // Implicit range checks.
11441 // A range check in the ideal world has one of the following shapes:
11442 //  - (If le (CmpU length index)), (IfTrue  throw exception)
11443 //  - (If lt (CmpU index length)), (IfFalse throw exception)
11444 //
11445 // Match range check 'If le (CmpU length index)'.
11446 instruct rangeCheck_iReg_uimm15(cmpOp cmp, iRegIsrc src_length, uimmI15 index, label labl) %{
11447   match(If cmp (CmpU src_length index));
11448   effect(USE labl);
11449   predicate(TrapBasedRangeChecks &&
11450             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le &&
11451             PROB_UNLIKELY(_leaf->as_If()->_prob) >= PROB_ALWAYS &&
11452             (Matcher::branches_to_uncommon_trap(_leaf)));
11453 
11454   ins_is_TrapBasedCheckNode(true);
11455 
11456   format %{ "TWI     $index $cmp $src_length \t// RangeCheck => trap $labl" %}
11457   size(4);
11458   ins_encode %{
11459     if ($cmp$$cmpcode == 0x1 /* less_equal */) {
11460       __ trap_range_check_le($src_length$$Register, $index$$constant);
11461     } else {
11462       // Both successors are uncommon traps, probability is 0.
11463       // Node got flipped during fixup flow.
11464       assert($cmp$$cmpcode == 0x9, "must be greater");
11465       __ trap_range_check_g($src_length$$Register, $index$$constant);
11466     }
11467   %}
11468   ins_pipe(pipe_class_trap);
11469 %}
11470 
11471 // Match range check 'If lt (CmpU index length)'.
11472 instruct rangeCheck_iReg_iReg(cmpOp cmp, iRegIsrc src_index, iRegIsrc src_length, label labl) %{
11473   match(If cmp (CmpU src_index src_length));
11474   effect(USE labl);
11475   predicate(TrapBasedRangeChecks &&
11476             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11477             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11478             (Matcher::branches_to_uncommon_trap(_leaf)));
11479 
11480   ins_is_TrapBasedCheckNode(true);
11481 
11482   format %{ "TW      $src_index $cmp $src_length \t// RangeCheck => trap $labl" %}
11483   size(4);
11484   ins_encode %{
11485     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11486       __ trap_range_check_ge($src_index$$Register, $src_length$$Register);
11487     } else {
11488       // Both successors are uncommon traps, probability is 0.
11489       // Node got flipped during fixup flow.
11490       assert($cmp$$cmpcode == 0x8, "must be less");
11491       __ trap_range_check_l($src_index$$Register, $src_length$$Register);
11492     }
11493   %}
11494   ins_pipe(pipe_class_trap);
11495 %}
11496 
11497 // Match range check 'If lt (CmpU index length)'.
11498 instruct rangeCheck_uimm15_iReg(cmpOp cmp, iRegIsrc src_index, uimmI15 length, label labl) %{
11499   match(If cmp (CmpU src_index length));
11500   effect(USE labl);
11501   predicate(TrapBasedRangeChecks &&
11502             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt &&
11503             _leaf->as_If()->_prob >= PROB_ALWAYS &&
11504             (Matcher::branches_to_uncommon_trap(_leaf)));
11505 
11506   ins_is_TrapBasedCheckNode(true);
11507 
11508   format %{ "TWI     $src_index $cmp $length \t// RangeCheck => trap $labl" %}
11509   size(4);
11510   ins_encode %{
11511     if ($cmp$$cmpcode == 0x0 /* greater_equal */) {
11512       __ trap_range_check_ge($src_index$$Register, $length$$constant);
11513     } else {
11514       // Both successors are uncommon traps, probability is 0.
11515       // Node got flipped during fixup flow.
11516       assert($cmp$$cmpcode == 0x8, "must be less");
11517       __ trap_range_check_l($src_index$$Register, $length$$constant);
11518     }
11519   %}
11520   ins_pipe(pipe_class_trap);
11521 %}
11522 
11523 instruct compU_reg_reg(flagsReg crx, iRegIsrc src1, iRegIsrc src2) %{
11524   match(Set crx (CmpU src1 src2));
11525   format %{ "CMPLW   $crx, $src1, $src2 \t// unsigned" %}
11526   size(4);
11527   ins_encode %{
11528     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11529   %}
11530   ins_pipe(pipe_class_compare);
11531 %}
11532 
11533 instruct compU_reg_uimm16(flagsReg crx, iRegIsrc src1, uimmI16 src2) %{
11534   match(Set crx (CmpU src1 src2));
11535   size(4);
11536   format %{ "CMPLWI  $crx, $src1, $src2" %}
11537   ins_encode %{
11538     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11539   %}
11540   ins_pipe(pipe_class_compare);
11541 %}
11542 
11543 // Implicit zero checks (more implicit null checks).
11544 // No constant pool entries required.
11545 instruct zeroCheckN_iReg_imm0(cmpOp cmp, iRegNsrc value, immN_0 zero, label labl) %{
11546   match(If cmp (CmpN value zero));
11547   effect(USE labl);
11548   predicate(TrapBasedNullChecks &&
11549             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11550             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11551             Matcher::branches_to_uncommon_trap(_leaf));
11552   ins_cost(1);
11553 
11554   ins_is_TrapBasedCheckNode(true);
11555 
11556   format %{ "TDI     $value $cmp $zero \t// ZeroCheckN => trap $labl" %}
11557   size(4);
11558   ins_encode %{
11559     if ($cmp$$cmpcode == 0xA) {
11560       __ trap_null_check($value$$Register);
11561     } else {
11562       // Both successors are uncommon traps, probability is 0.
11563       // Node got flipped during fixup flow.
11564       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11565       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11566     }
11567   %}
11568   ins_pipe(pipe_class_trap);
11569 %}
11570 
11571 // Compare narrow oops.
11572 instruct cmpN_reg_reg(flagsReg crx, iRegNsrc src1, iRegNsrc src2) %{
11573   match(Set crx (CmpN src1 src2));
11574 
11575   size(4);
11576   ins_cost(2);
11577   format %{ "CMPLW   $crx, $src1, $src2 \t// compressed ptr" %}
11578   ins_encode %{
11579     __ cmplw($crx$$CondRegister, $src1$$Register, $src2$$Register);
11580   %}
11581   ins_pipe(pipe_class_compare);
11582 %}
11583 
11584 instruct cmpN_reg_imm0(flagsReg crx, iRegNsrc src1, immN_0 src2) %{
11585   match(Set crx (CmpN src1 src2));
11586   // Make this more expensive than zeroCheckN_iReg_imm0.
11587   ins_cost(2);
11588 
11589   format %{ "CMPLWI  $crx, $src1, $src2 \t// compressed ptr" %}
11590   size(4);
11591   ins_encode %{
11592     __ cmplwi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11593   %}
11594   ins_pipe(pipe_class_compare);
11595 %}
11596 
11597 // Implicit zero checks (more implicit null checks).
11598 // No constant pool entries required.
11599 instruct zeroCheckP_reg_imm0(cmpOp cmp, iRegP_N2P value, immP_0 zero, label labl) %{
11600   match(If cmp (CmpP value zero));
11601   effect(USE labl);
11602   predicate(TrapBasedNullChecks &&
11603             _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne &&
11604             _leaf->as_If()->_prob >= PROB_LIKELY_MAG(4) &&
11605             Matcher::branches_to_uncommon_trap(_leaf));
11606   ins_cost(1); // Should not be cheaper than zeroCheckN.
11607 
11608   ins_is_TrapBasedCheckNode(true);
11609 
11610   format %{ "TDI     $value $cmp $zero \t// ZeroCheckP => trap $labl" %}
11611   size(4);
11612   ins_encode %{
11613     if ($cmp$$cmpcode == 0xA) {
11614       __ trap_null_check($value$$Register);
11615     } else {
11616       // Both successors are uncommon traps, probability is 0.
11617       // Node got flipped during fixup flow.
11618       assert($cmp$$cmpcode == 0x2 , "must be equal(0xA) or notEqual(0x2)");
11619       __ trap_null_check($value$$Register, Assembler::traptoGreaterThanUnsigned);
11620     }
11621   %}
11622   ins_pipe(pipe_class_trap);
11623 %}
11624 
11625 // Compare Pointers
11626 instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{
11627   match(Set crx (CmpP src1 src2));
11628   format %{ "CMPLD   $crx, $src1, $src2 \t// ptr" %}
11629   size(4);
11630   ins_encode %{
11631     __ cmpld($crx$$CondRegister, $src1$$Register, $src2$$Register);
11632   %}
11633   ins_pipe(pipe_class_compare);
11634 %}
11635 
11636 instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{
11637   match(Set crx (CmpP src1 src2));
11638   format %{ "CMPLDI   $crx, $src1, $src2 \t// ptr" %}
11639   size(4);
11640   ins_encode %{
11641     __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF)));
11642   %}
11643   ins_pipe(pipe_class_compare);
11644 %}
11645 
11646 // Used in postalloc expand.
11647 instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{
11648   // This match rule prevents reordering of node before a safepoint.
11649   // This only makes sense if this instructions is used exclusively
11650   // for the expansion of EncodeP!
11651   match(Set crx (CmpP src1 src2));
11652   predicate(false);
11653 
11654   format %{ "CMPDI   $crx, $src1, $src2" %}
11655   size(4);
11656   ins_encode %{
11657     __ cmpdi($crx$$CondRegister, $src1$$Register, $src2$$constant);
11658   %}
11659   ins_pipe(pipe_class_compare);
11660 %}
11661 
11662 //----------Float Compares----------------------------------------------------
11663 
11664 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
11665   // Needs matchrule, see cmpDUnordered.
11666   match(Set crx (CmpF src1 src2));
11667   // no match-rule, false predicate
11668   predicate(false);
11669 
11670   format %{ "cmpFUrd $crx, $src1, $src2" %}
11671   size(4);
11672   ins_encode %{
11673     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11674   %}
11675   ins_pipe(pipe_class_default);
11676 %}
11677 
11678 instruct cmov_bns_less(flagsReg crx) %{
11679   // no match-rule, false predicate
11680   effect(DEF crx);
11681   predicate(false);
11682 
11683   ins_variable_size_depending_on_alignment(true);
11684 
11685   format %{ "cmov    $crx" %}
11686   // Worst case is branch + move + stop, no stop without scheduler.
11687   size(12);
11688   ins_encode %{
11689     Label done;
11690     __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
11691     __ li(R0, 0);
11692     __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
11693     __ bind(done);
11694   %}
11695   ins_pipe(pipe_class_default);
11696 %}
11697 
11698 // Compare floating, generate condition code.
11699 instruct cmpF_reg_reg_Ex(flagsReg crx, regF src1, regF src2) %{
11700   // FIXME: should we match 'If cmp (CmpF src1 src2))' ??
11701   //
11702   // The following code sequence occurs a lot in mpegaudio:
11703   //
11704   // block BXX:
11705   // 0: instruct cmpFUnordered_reg_reg (cmpF_reg_reg-0):
11706   //    cmpFUrd CCR6, F11, F9
11707   // 4: instruct cmov_bns_less (cmpF_reg_reg-1):
11708   //    cmov CCR6
11709   // 8: instruct branchConSched:
11710   //    B_FARle CCR6, B56  P=0.500000 C=-1.000000
11711   match(Set crx (CmpF src1 src2));
11712   ins_cost(DEFAULT_COST+BRANCH_COST);
11713 
11714   format %{ "CmpF    $crx, $src1, $src2 \t// postalloc expanded" %}
11715   postalloc_expand %{
11716     //
11717     // replaces
11718     //
11719     //   region  src1  src2
11720     //    \       |     |
11721     //     crx=cmpF_reg_reg
11722     //
11723     // with
11724     //
11725     //   region  src1  src2
11726     //    \       |     |
11727     //     crx=cmpFUnordered_reg_reg
11728     //      |
11729     //      ^  region
11730     //      |   \
11731     //      crx=cmov_bns_less
11732     //
11733 
11734     // Create new nodes.
11735     MachNode *m1 = new cmpFUnordered_reg_regNode();
11736     MachNode *m2 = new cmov_bns_lessNode();
11737 
11738     // inputs for new nodes
11739     m1->add_req(n_region, n_src1, n_src2);
11740     m2->add_req(n_region);
11741     m2->add_prec(m1);
11742 
11743     // operands for new nodes
11744     m1->_opnds[0] = op_crx;
11745     m1->_opnds[1] = op_src1;
11746     m1->_opnds[2] = op_src2;
11747     m2->_opnds[0] = op_crx;
11748 
11749     // registers for new nodes
11750     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11751     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11752 
11753     // Insert new nodes.
11754     nodes->push(m1);
11755     nodes->push(m2);
11756   %}
11757 %}
11758 
11759 // Compare float, generate -1,0,1
11760 instruct cmpF3_reg_reg(iRegIdst dst, regF src1, regF src2, flagsRegCR0 cr0) %{
11761   match(Set dst (CmpF3 src1 src2));
11762   effect(KILL cr0);
11763   ins_cost(DEFAULT_COST * 6);
11764   size(VM_Version::has_brw() ? 20 : 24);
11765 
11766   format %{ "cmpF3_reg_reg $dst, $src1, $src2" %}
11767 
11768   ins_encode %{
11769     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11770     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11771   %}
11772   ins_pipe(pipe_class_default);
11773 %}
11774 
11775 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
11776   // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
11777   // node right before the conditional move using it.
11778   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
11779   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
11780   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
11781   // conditional move was supposed to be spilled.
11782   match(Set crx (CmpD src1 src2));
11783   // False predicate, shall not be matched.
11784   predicate(false);
11785 
11786   format %{ "cmpFUrd $crx, $src1, $src2" %}
11787   size(4);
11788   ins_encode %{
11789     __ fcmpu($crx$$CondRegister, $src1$$FloatRegister, $src2$$FloatRegister);
11790   %}
11791   ins_pipe(pipe_class_default);
11792 %}
11793 
11794 instruct cmpD_reg_reg_Ex(flagsReg crx, regD src1, regD src2) %{
11795   match(Set crx (CmpD src1 src2));
11796   ins_cost(DEFAULT_COST+BRANCH_COST);
11797 
11798   format %{ "CmpD    $crx, $src1, $src2 \t// postalloc expanded" %}
11799   postalloc_expand %{
11800     //
11801     // replaces
11802     //
11803     //   region  src1  src2
11804     //    \       |     |
11805     //     crx=cmpD_reg_reg
11806     //
11807     // with
11808     //
11809     //   region  src1  src2
11810     //    \       |     |
11811     //     crx=cmpDUnordered_reg_reg
11812     //      |
11813     //      ^  region
11814     //      |   \
11815     //      crx=cmov_bns_less
11816     //
11817 
11818     // create new nodes
11819     MachNode *m1 = new cmpDUnordered_reg_regNode();
11820     MachNode *m2 = new cmov_bns_lessNode();
11821 
11822     // inputs for new nodes
11823     m1->add_req(n_region, n_src1, n_src2);
11824     m2->add_req(n_region);
11825     m2->add_prec(m1);
11826 
11827     // operands for new nodes
11828     m1->_opnds[0] = op_crx;
11829     m1->_opnds[1] = op_src1;
11830     m1->_opnds[2] = op_src2;
11831     m2->_opnds[0] = op_crx;
11832 
11833     // registers for new nodes
11834     ra_->set_pair(m1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11835     ra_->set_pair(m2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); // crx
11836 
11837     // Insert new nodes.
11838     nodes->push(m1);
11839     nodes->push(m2);
11840   %}
11841 %}
11842 
11843 // Compare double, generate -1,0,1
11844 instruct cmpD3_reg_reg(iRegIdst dst, regD src1, regD src2, flagsRegCR0 cr0) %{
11845   match(Set dst (CmpD3 src1 src2));
11846   effect(KILL cr0);
11847   ins_cost(DEFAULT_COST * 6);
11848   size(VM_Version::has_brw() ? 20 : 24);
11849 
11850   format %{ "cmpD3_reg_reg $dst, $src1, $src2" %}
11851 
11852   ins_encode %{
11853     __ fcmpu(CCR0, $src1$$FloatRegister, $src2$$FloatRegister);
11854     __ set_cmpu3($dst$$Register, true); // C2 requires unordered to get treated like less
11855   %}
11856   ins_pipe(pipe_class_default);
11857 %}
11858 
11859 // Compare char
11860 instruct cmprb_Digit_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11861   match(Set dst (Digit src1));
11862   effect(TEMP src2, TEMP crx);
11863   ins_cost(3 * DEFAULT_COST);
11864 
11865   format %{ "LI      $src2, 0x3930\n\t"
11866             "CMPRB   $crx, 0, $src1, $src2\n\t"
11867             "SETB    $dst, $crx" %}
11868   size(12);
11869   ins_encode %{
11870     // 0x30: 0, 0x39: 9
11871     __ li($src2$$Register, 0x3930);
11872     // compare src1 with ranges 0x30 to 0x39
11873     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11874     __ setb($dst$$Register, $crx$$CondRegister);
11875   %}
11876   ins_pipe(pipe_class_default);
11877 %}
11878 
11879 instruct cmprb_LowerCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11880   match(Set dst (LowerCase src1));
11881   effect(TEMP src2, TEMP crx);
11882   ins_cost(12 * DEFAULT_COST);
11883 
11884   format %{ "LI      $src2, 0x7A61\n\t"
11885             "CMPRB   $crx, 0, $src1, $src2\n\t"
11886             "BGT     $crx, done\n\t"
11887             "LIS     $src2, (signed short)0xF6DF\n\t"
11888             "ORI     $src2, $src2, 0xFFF8\n\t"
11889             "CMPRB   $crx, 1, $src1, $src2\n\t"
11890             "BGT     $crx, done\n\t"
11891             "LIS     $src2, (signed short)0xAAB5\n\t"
11892             "ORI     $src2, $src2, 0xBABA\n\t"
11893             "INSRDI  $src2, $src2, 32, 0\n\t"
11894             "CMPEQB  $crx, 1, $src1, $src2\n"
11895             "done:\n\t"
11896             "SETB    $dst, $crx" %}
11897 
11898   size(48);
11899   ins_encode %{
11900     Label done;
11901     // 0x61: a, 0x7A: z
11902     __ li($src2$$Register, 0x7A61);
11903     // compare src1 with ranges 0x61 to 0x7A
11904     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11905     __ bgt($crx$$CondRegister, done);
11906 
11907     // 0xDF: sharp s, 0xFF: y with diaeresis, 0xF7 is not the lower case
11908     __ lis($src2$$Register, (signed short)0xF6DF);
11909     __ ori($src2$$Register, $src2$$Register, 0xFFF8);
11910     // compare src1 with ranges 0xDF to 0xF6 and 0xF8 to 0xFF
11911     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11912     __ bgt($crx$$CondRegister, done);
11913 
11914     // 0xAA: feminine ordinal indicator
11915     // 0xB5: micro sign
11916     // 0xBA: masculine ordinal indicator
11917     __ lis($src2$$Register, (signed short)0xAAB5);
11918     __ ori($src2$$Register, $src2$$Register, 0xBABA);
11919     __ insrdi($src2$$Register, $src2$$Register, 32, 0);
11920     // compare src1 with 0xAA, 0xB5, and 0xBA
11921     __ cmpeqb($crx$$CondRegister, $src1$$Register, $src2$$Register);
11922 
11923     __ bind(done);
11924     __ setb($dst$$Register, $crx$$CondRegister);
11925   %}
11926   ins_pipe(pipe_class_default);
11927 %}
11928 
11929 instruct cmprb_UpperCase_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11930   match(Set dst (UpperCase src1));
11931   effect(TEMP src2, TEMP crx);
11932   ins_cost(7 * DEFAULT_COST);
11933 
11934   format %{ "LI      $src2, 0x5A41\n\t"
11935             "CMPRB   $crx, 0, $src1, $src2\n\t"
11936             "BGT     $crx, done\n\t"
11937             "LIS     $src2, (signed short)0xD6C0\n\t"
11938             "ORI     $src2, $src2, 0xDED8\n\t"
11939             "CMPRB   $crx, 1, $src1, $src2\n"
11940             "done:\n\t"
11941             "SETB    $dst, $crx" %}
11942 
11943   size(28);
11944   ins_encode %{
11945     Label done;
11946     // 0x41: A, 0x5A: Z
11947     __ li($src2$$Register, 0x5A41);
11948     // compare src1 with a range 0x41 to 0x5A
11949     __ cmprb($crx$$CondRegister, 0, $src1$$Register, $src2$$Register);
11950     __ bgt($crx$$CondRegister, done);
11951 
11952     // 0xC0: a with grave, 0xDE: thorn, 0xD7 is not the upper case
11953     __ lis($src2$$Register, (signed short)0xD6C0);
11954     __ ori($src2$$Register, $src2$$Register, 0xDED8);
11955     // compare src1 with ranges 0xC0 to 0xD6 and 0xD8 to 0xDE
11956     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11957 
11958     __ bind(done);
11959     __ setb($dst$$Register, $crx$$CondRegister);
11960   %}
11961   ins_pipe(pipe_class_default);
11962 %}
11963 
11964 instruct cmprb_Whitespace_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11965   match(Set dst (Whitespace src1));
11966   predicate(PowerArchitecturePPC64 <= 9);
11967   effect(TEMP src2, TEMP crx);
11968   ins_cost(4 * DEFAULT_COST);
11969 
11970   format %{ "LI      $src2, 0x0D09\n\t"
11971             "ADDIS   $src2, 0x201C\n\t"
11972             "CMPRB   $crx, 1, $src1, $src2\n\t"
11973             "SETB    $dst, $crx" %}
11974   size(16);
11975   ins_encode %{
11976     // 0x09 to 0x0D, 0x1C to 0x20
11977     __ li($src2$$Register, 0x0D09);
11978     __ addis($src2$$Register, $src2$$Register, 0x0201C);
11979     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
11980     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
11981     __ setb($dst$$Register, $crx$$CondRegister);
11982   %}
11983   ins_pipe(pipe_class_default);
11984 %}
11985 
11986 // Power 10 version, using prefixed addi to load 32-bit constant
11987 instruct cmprb_Whitespace_reg_reg_prefixed(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsReg crx) %{
11988   match(Set dst (Whitespace src1));
11989   predicate(PowerArchitecturePPC64 >= 10);
11990   effect(TEMP src2, TEMP crx);
11991   ins_cost(3 * DEFAULT_COST);
11992 
11993   format %{ "PLI     $src2, 0x201C0D09\n\t"
11994             "CMPRB   $crx, 1, $src1, $src2\n\t"
11995             "SETB    $dst, $crx" %}
11996   size(16);
11997   ins_encode %{
11998     // 0x09 to 0x0D, 0x1C to 0x20
11999     assert( ((intptr_t)(__ pc()) & 0x3c) != 0x3c, "Bad alignment for prefixed instruction at " INTPTR_FORMAT, (intptr_t)(__ pc()));
12000     __ pli($src2$$Register, 0x201C0D09);
12001     // compare src with ranges 0x09 to 0x0D and 0x1C to 0x20
12002     __ cmprb($crx$$CondRegister, 1, $src1$$Register, $src2$$Register);
12003     __ setb($dst$$Register, $crx$$CondRegister);
12004   %}
12005   ins_pipe(pipe_class_default);
12006   ins_alignment(2);
12007 %}
12008 
12009 //----------Branches---------------------------------------------------------
12010 // Jump
12011 
12012 // Direct Branch.
12013 instruct branch(label labl) %{
12014   match(Goto);
12015   effect(USE labl);
12016   ins_cost(BRANCH_COST);
12017 
12018   format %{ "B       $labl" %}
12019   size(4);
12020   ins_encode %{
12021      Label d;    // dummy
12022      __ bind(d);
12023      Label* p = $labl$$label;
12024      // `p' is `NULL' when this encoding class is used only to
12025      // determine the size of the encoded instruction.
12026      Label& l = (NULL == p)? d : *(p);
12027      __ b(l);
12028   %}
12029   ins_pipe(pipe_class_default);
12030 %}
12031 
12032 // Conditional Near Branch
12033 instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12034   // Same match rule as `branchConFar'.
12035   match(If cmp crx);
12036   effect(USE lbl);
12037   ins_cost(BRANCH_COST);
12038 
12039   // If set to 1 this indicates that the current instruction is a
12040   // short variant of a long branch. This avoids using this
12041   // instruction in first-pass matching. It will then only be used in
12042   // the `Shorten_branches' pass.
12043   ins_short_branch(1);
12044 
12045   format %{ "B$cmp     $crx, $lbl" %}
12046   size(4);
12047   ins_encode( enc_bc(crx, cmp, lbl) );
12048   ins_pipe(pipe_class_default);
12049 %}
12050 
12051 // This is for cases when the ppc64 `bc' instruction does not
12052 // reach far enough. So we emit a far branch here, which is more
12053 // expensive.
12054 //
12055 // Conditional Far Branch
12056 instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
12057   // Same match rule as `branchCon'.
12058   match(If cmp crx);
12059   effect(USE crx, USE lbl);
12060   // Higher cost than `branchCon'.
12061   ins_cost(5*BRANCH_COST);
12062 
12063   // This is not a short variant of a branch, but the long variant.
12064   ins_short_branch(0);
12065 
12066   format %{ "B_FAR$cmp $crx, $lbl" %}
12067   size(8);
12068   ins_encode( enc_bc_far(crx, cmp, lbl) );
12069   ins_pipe(pipe_class_default);
12070 %}
12071 
12072 instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
12073   match(CountedLoopEnd cmp crx);
12074   effect(USE labl);
12075   ins_cost(BRANCH_COST);
12076 
12077   // short variant.
12078   ins_short_branch(1);
12079 
12080   format %{ "B$cmp     $crx, $labl \t// counted loop end" %}
12081   size(4);
12082   ins_encode( enc_bc(crx, cmp, labl) );
12083   ins_pipe(pipe_class_default);
12084 %}
12085 
12086 instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
12087   match(CountedLoopEnd cmp crx);
12088   effect(USE labl);
12089   ins_cost(BRANCH_COST);
12090 
12091   // Long variant.
12092   ins_short_branch(0);
12093 
12094   format %{ "B_FAR$cmp $crx, $labl \t// counted loop end" %}
12095   size(8);
12096   ins_encode( enc_bc_far(crx, cmp, labl) );
12097   ins_pipe(pipe_class_default);
12098 %}
12099 
12100 // ============================================================================
12101 // Java runtime operations, intrinsics and other complex operations.
12102 
12103 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12104 // array for an instance of the superklass. Set a hidden internal cache on a
12105 // hit (cache is checked with exposed code in gen_subtype_check()). Return
12106 // not zero for a miss or zero for a hit. The encoding ALSO sets flags.
12107 //
12108 // GL TODO: Improve this.
12109 // - result should not be a TEMP
12110 // - Add match rule as on sparc avoiding additional Cmp.
12111 instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P superklass,
12112                              iRegPdst tmp_klass, iRegPdst tmp_arrayptr) %{
12113   match(Set result (PartialSubtypeCheck subklass superklass));
12114   effect(TEMP_DEF result, TEMP tmp_klass, TEMP tmp_arrayptr);
12115   ins_cost(DEFAULT_COST*10);
12116 
12117   format %{ "PartialSubtypeCheck $result = ($subklass instanceOf $superklass) tmp: $tmp_klass, $tmp_arrayptr" %}
12118   ins_encode %{
12119     __ check_klass_subtype_slow_path($subklass$$Register, $superklass$$Register, $tmp_arrayptr$$Register,
12120                                      $tmp_klass$$Register, NULL, $result$$Register);
12121   %}
12122   ins_pipe(pipe_class_default);
12123 %}
12124 
12125 // inlined locking and unlocking
12126 
12127 instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
12128   match(Set crx (FastLock oop box));
12129   effect(TEMP tmp1, TEMP tmp2);
12130   predicate(!Compile::current()->use_rtm());
12131 
12132   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2" %}
12133   ins_encode %{
12134     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12135                                  $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0,
12136                                  UseBiasedLocking && !UseOptoBiasInlining);
12137     // If locking was successfull, crx should indicate 'EQ'.
12138     // The compiler generates a branch to the runtime call to
12139     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12140   %}
12141   ins_pipe(pipe_class_compare);
12142 %}
12143 
12144 // Separate version for TM. Use bound register for box to enable USE_KILL.
12145 instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12146   match(Set crx (FastLock oop box));
12147   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
12148   predicate(Compile::current()->use_rtm());
12149 
12150   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
12151   ins_encode %{
12152     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12153                                  $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12154                                  /*Biased Locking*/ false,
12155                                  _rtm_counters, _stack_rtm_counters,
12156                                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12157                                  /*TM*/ true, ra_->C->profile_rtm());
12158     // If locking was successfull, crx should indicate 'EQ'.
12159     // The compiler generates a branch to the runtime call to
12160     // _complete_monitor_locking_Java for the case where crx is 'NE'.
12161   %}
12162   ins_pipe(pipe_class_compare);
12163 %}
12164 
12165 instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12166   match(Set crx (FastUnlock oop box));
12167   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12168   predicate(!Compile::current()->use_rtm());
12169 
12170   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
12171   ins_encode %{
12172     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12173                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12174                                    UseBiasedLocking && !UseOptoBiasInlining,
12175                                    false);
12176     // If unlocking was successfull, crx should indicate 'EQ'.
12177     // The compiler generates a branch to the runtime call to
12178     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12179   %}
12180   ins_pipe(pipe_class_compare);
12181 %}
12182 
12183 instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
12184   match(Set crx (FastUnlock oop box));
12185   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
12186   predicate(Compile::current()->use_rtm());
12187 
12188   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
12189   ins_encode %{
12190     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
12191                                    $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
12192                                    /*Biased Locking*/ false, /*TM*/ true);
12193     // If unlocking was successfull, crx should indicate 'EQ'.
12194     // The compiler generates a branch to the runtime call to
12195     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
12196   %}
12197   ins_pipe(pipe_class_compare);
12198 %}
12199 
12200 // Align address.
12201 instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
12202   match(Set dst (CastX2P (AndL (CastP2X src) mask)));
12203 
12204   format %{ "ANDDI   $dst, $src, $mask \t// next aligned address" %}
12205   size(4);
12206   ins_encode %{
12207     __ clrrdi($dst$$Register, $src$$Register, log2i_exact(-(julong)$mask$$constant));
12208   %}
12209   ins_pipe(pipe_class_default);
12210 %}
12211 
12212 // Array size computation.
12213 instruct array_size(iRegLdst dst, iRegPsrc end, iRegPsrc start) %{
12214   match(Set dst (SubL (CastP2X end) (CastP2X start)));
12215 
12216   format %{ "SUB     $dst, $end, $start \t// array size in bytes" %}
12217   size(4);
12218   ins_encode %{
12219     __ subf($dst$$Register, $start$$Register, $end$$Register);
12220   %}
12221   ins_pipe(pipe_class_default);
12222 %}
12223 
12224 // Clear-array with constant short array length. The versions below can use dcbz with cnt > 30.
12225 instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12226   match(Set dummy (ClearArray cnt base));
12227   effect(USE_KILL base, KILL ctr);
12228   ins_cost(2 * MEMORY_REF_COST);
12229 
12230   format %{ "ClearArray $cnt, $base" %}
12231   ins_encode %{
12232     __ clear_memory_constlen($base$$Register, $cnt$$constant, R0); // kills base, R0
12233   %}
12234   ins_pipe(pipe_class_default);
12235 %}
12236 
12237 // Clear-array with constant large array length.
12238 instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{
12239   match(Set dummy (ClearArray cnt base));
12240   effect(USE_KILL base, TEMP tmp, KILL ctr);
12241   ins_cost(3 * MEMORY_REF_COST);
12242 
12243   format %{ "ClearArray $cnt, $base \t// KILL $tmp" %}
12244   ins_encode %{
12245     __ clear_memory_doubleword($base$$Register, $tmp$$Register, R0, $cnt$$constant); // kills base, R0
12246   %}
12247   ins_pipe(pipe_class_default);
12248 %}
12249 
12250 // Clear-array with dynamic array length.
12251 instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
12252   match(Set dummy (ClearArray cnt base));
12253   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
12254   ins_cost(4 * MEMORY_REF_COST);
12255 
12256   format %{ "ClearArray $cnt, $base" %}
12257   ins_encode %{
12258     __ clear_memory_doubleword($base$$Register, $cnt$$Register, R0); // kills cnt, base, R0
12259   %}
12260   ins_pipe(pipe_class_default);
12261 %}
12262 
12263 instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12264                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12265   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
12266   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12267   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12268   ins_cost(300);
12269   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12270   ins_encode %{
12271     __ string_compare($str1$$Register, $str2$$Register,
12272                       $cnt1$$Register, $cnt2$$Register,
12273                       $tmp$$Register,
12274                       $result$$Register, StrIntrinsicNode::LL);
12275   %}
12276   ins_pipe(pipe_class_default);
12277 %}
12278 
12279 instruct string_compareU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12280                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12281   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
12282   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12283   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12284   ins_cost(300);
12285   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12286   ins_encode %{
12287     __ string_compare($str1$$Register, $str2$$Register,
12288                       $cnt1$$Register, $cnt2$$Register,
12289                       $tmp$$Register,
12290                       $result$$Register, StrIntrinsicNode::UU);
12291   %}
12292   ins_pipe(pipe_class_default);
12293 %}
12294 
12295 instruct string_compareLU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12296                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12297   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
12298   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12299   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12300   ins_cost(300);
12301   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12302   ins_encode %{
12303     __ string_compare($str1$$Register, $str2$$Register,
12304                       $cnt1$$Register, $cnt2$$Register,
12305                       $tmp$$Register,
12306                       $result$$Register, StrIntrinsicNode::LU);
12307   %}
12308   ins_pipe(pipe_class_default);
12309 %}
12310 
12311 instruct string_compareUL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
12312                           iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12313   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
12314   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12315   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
12316   ins_cost(300);
12317   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
12318   ins_encode %{
12319     __ string_compare($str2$$Register, $str1$$Register,
12320                       $cnt2$$Register, $cnt1$$Register,
12321                       $tmp$$Register,
12322                       $result$$Register, StrIntrinsicNode::UL);
12323   %}
12324   ins_pipe(pipe_class_default);
12325 %}
12326 
12327 instruct string_equalsL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12328                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12329   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
12330   match(Set result (StrEquals (Binary str1 str2) cnt));
12331   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12332   ins_cost(300);
12333   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12334   ins_encode %{
12335     __ array_equals(false, $str1$$Register, $str2$$Register,
12336                     $cnt$$Register, $tmp$$Register,
12337                     $result$$Register, true /* byte */);
12338   %}
12339   ins_pipe(pipe_class_default);
12340 %}
12341 
12342 instruct string_equalsU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
12343                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
12344   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
12345   match(Set result (StrEquals (Binary str1 str2) cnt));
12346   effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
12347   ins_cost(300);
12348   format %{ "String Equals char[]  $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
12349   ins_encode %{
12350     __ array_equals(false, $str1$$Register, $str2$$Register,
12351                     $cnt$$Register, $tmp$$Register,
12352                     $result$$Register, false /* byte */);
12353   %}
12354   ins_pipe(pipe_class_default);
12355 %}
12356 
12357 instruct array_equalsB(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12358                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12359   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12360   match(Set result (AryEq ary1 ary2));
12361   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12362   ins_cost(300);
12363   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12364   ins_encode %{
12365     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12366                     $tmp1$$Register, $tmp2$$Register,
12367                     $result$$Register, true /* byte */);
12368   %}
12369   ins_pipe(pipe_class_default);
12370 %}
12371 
12372 instruct array_equalsC(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
12373                        iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
12374   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12375   match(Set result (AryEq ary1 ary2));
12376   effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
12377   ins_cost(300);
12378   format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
12379   ins_encode %{
12380     __ array_equals(true, $ary1$$Register, $ary2$$Register,
12381                     $tmp1$$Register, $tmp2$$Register,
12382                     $result$$Register, false /* byte */);
12383   %}
12384   ins_pipe(pipe_class_default);
12385 %}
12386 
12387 instruct indexOf_imm1_char_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12388                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12389                              iRegIdst tmp1, iRegIdst tmp2,
12390                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12391   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12392   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12393   // Required for EA: check if it is still a type_array.
12394   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12395   ins_cost(150);
12396 
12397   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12398             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12399 
12400   ins_encode %{
12401     immPOper *needleOper = (immPOper *)$needleImm;
12402     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12403     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12404     jchar chr;
12405 #ifdef VM_LITTLE_ENDIAN
12406     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12407            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12408 #else
12409     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12410            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12411 #endif
12412     __ string_indexof_char($result$$Register,
12413                            $haystack$$Register, $haycnt$$Register,
12414                            R0, chr,
12415                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12416   %}
12417   ins_pipe(pipe_class_compare);
12418 %}
12419 
12420 instruct indexOf_imm1_char_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12421                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
12422                              iRegIdst tmp1, iRegIdst tmp2,
12423                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12424   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12425   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12426   // Required for EA: check if it is still a type_array.
12427   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12428   ins_cost(150);
12429 
12430   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12431             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12432 
12433   ins_encode %{
12434     immPOper *needleOper = (immPOper *)$needleImm;
12435     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12436     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12437     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12438     __ string_indexof_char($result$$Register,
12439                            $haystack$$Register, $haycnt$$Register,
12440                            R0, chr,
12441                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12442   %}
12443   ins_pipe(pipe_class_compare);
12444 %}
12445 
12446 instruct indexOf_imm1_char_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12447                               immP needleImm, immL offsetImm, immI_1 needlecntImm,
12448                               iRegIdst tmp1, iRegIdst tmp2,
12449                               flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12450   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
12451   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12452   // Required for EA: check if it is still a type_array.
12453   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12454   ins_cost(150);
12455 
12456   format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
12457             "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12458 
12459   ins_encode %{
12460     immPOper *needleOper = (immPOper *)$needleImm;
12461     const TypeOopPtr *t = needleOper->type()->isa_oopptr();
12462     ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
12463     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12464     __ string_indexof_char($result$$Register,
12465                            $haystack$$Register, $haycnt$$Register,
12466                            R0, chr,
12467                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12468   %}
12469   ins_pipe(pipe_class_compare);
12470 %}
12471 
12472 instruct indexOf_imm1_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12473                         rscratch2RegP needle, immI_1 needlecntImm,
12474                         iRegIdst tmp1, iRegIdst tmp2,
12475                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12476   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12477   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12478   // Required for EA: check if it is still a type_array.
12479   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12480             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12481             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12482   ins_cost(180);
12483 
12484   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12485             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12486   ins_encode %{
12487     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12488     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12489     guarantee(needle_values, "sanity");
12490     jchar chr;
12491 #ifdef VM_LITTLE_ENDIAN
12492     chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
12493            ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
12494 #else
12495     chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
12496            ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
12497 #endif
12498     __ string_indexof_char($result$$Register,
12499                            $haystack$$Register, $haycnt$$Register,
12500                            R0, chr,
12501                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12502   %}
12503   ins_pipe(pipe_class_compare);
12504 %}
12505 
12506 instruct indexOf_imm1_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12507                         rscratch2RegP needle, immI_1 needlecntImm,
12508                         iRegIdst tmp1, iRegIdst tmp2,
12509                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12510   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12511   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12512   // Required for EA: check if it is still a type_array.
12513   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12514             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12515             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12516   ins_cost(180);
12517 
12518   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12519             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12520   ins_encode %{
12521     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12522     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12523     guarantee(needle_values, "sanity");
12524     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12525     __ string_indexof_char($result$$Register,
12526                            $haystack$$Register, $haycnt$$Register,
12527                            R0, chr,
12528                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12529   %}
12530   ins_pipe(pipe_class_compare);
12531 %}
12532 
12533 instruct indexOf_imm1_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12534                          rscratch2RegP needle, immI_1 needlecntImm,
12535                          iRegIdst tmp1, iRegIdst tmp2,
12536                          flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12537   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12538   effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12539   // Required for EA: check if it is still a type_array.
12540   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12541             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12542             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12543   ins_cost(180);
12544 
12545   format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12546             " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
12547   ins_encode %{
12548     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12549     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12550     guarantee(needle_values, "sanity");
12551     jchar chr = (jchar)needle_values->element_value(0).as_byte();
12552     __ string_indexof_char($result$$Register,
12553                            $haystack$$Register, $haycnt$$Register,
12554                            R0, chr,
12555                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12556   %}
12557   ins_pipe(pipe_class_compare);
12558 %}
12559 
12560 instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12561                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12562                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12563   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12564   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12565   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
12566   ins_cost(180);
12567 
12568   format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
12569             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12570   ins_encode %{
12571     __ string_indexof_char($result$$Register,
12572                            $haystack$$Register, $haycnt$$Register,
12573                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12574                            $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
12575   %}
12576   ins_pipe(pipe_class_compare);
12577 %}
12578 
12579 instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
12580                        iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
12581                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
12582   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
12583   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
12584   predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
12585   ins_cost(180);
12586 
12587   format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
12588             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
12589   ins_encode %{
12590     __ string_indexof_char($result$$Register,
12591                            $haystack$$Register, $haycnt$$Register,
12592                            $ch$$Register, 0 /* this is not used if the character is already in a register */,
12593                            $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
12594   %}
12595   ins_pipe(pipe_class_compare);
12596 %}
12597 
12598 instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12599                        iRegPsrc needle, uimmI15 needlecntImm,
12600                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12601                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12602   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12603   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12604          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12605   // Required for EA: check if it is still a type_array.
12606   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
12607             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12608             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12609   ins_cost(250);
12610 
12611   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12612             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12613   ins_encode %{
12614     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12615     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12616 
12617     __ string_indexof($result$$Register,
12618                       $haystack$$Register, $haycnt$$Register,
12619                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12620                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12621   %}
12622   ins_pipe(pipe_class_compare);
12623 %}
12624 
12625 instruct indexOf_imm_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12626                        iRegPsrc needle, uimmI15 needlecntImm,
12627                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12628                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12629   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12630   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12631          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12632   // Required for EA: check if it is still a type_array.
12633   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
12634             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12635             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12636   ins_cost(250);
12637 
12638   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12639             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12640   ins_encode %{
12641     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12642     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12643 
12644     __ string_indexof($result$$Register,
12645                       $haystack$$Register, $haycnt$$Register,
12646                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12647                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12648   %}
12649   ins_pipe(pipe_class_compare);
12650 %}
12651 
12652 instruct indexOf_imm_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
12653                         iRegPsrc needle, uimmI15 needlecntImm,
12654                         iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
12655                         flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12656   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
12657   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
12658          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12659   // Required for EA: check if it is still a type_array.
12660   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
12661             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
12662             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
12663   ins_cost(250);
12664 
12665   format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
12666             " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
12667   ins_encode %{
12668     Node *ndl = in(operand_index($needle));  // The node that defines needle.
12669     ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
12670 
12671     __ string_indexof($result$$Register,
12672                       $haystack$$Register, $haycnt$$Register,
12673                       $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
12674                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12675   %}
12676   ins_pipe(pipe_class_compare);
12677 %}
12678 
12679 instruct indexOf_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12680                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12681                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12682   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12683   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12684          TEMP_DEF result,
12685          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12686   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
12687   ins_cost(300);
12688 
12689   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12690              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12691   ins_encode %{
12692     __ string_indexof($result$$Register,
12693                       $haystack$$Register, $haycnt$$Register,
12694                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12695                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
12696   %}
12697   ins_pipe(pipe_class_compare);
12698 %}
12699 
12700 instruct indexOf_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12701                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12702                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12703   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12704   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12705          TEMP_DEF result,
12706          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12707   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
12708   ins_cost(300);
12709 
12710   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12711              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12712   ins_encode %{
12713     __ string_indexof($result$$Register,
12714                       $haystack$$Register, $haycnt$$Register,
12715                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12716                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
12717   %}
12718   ins_pipe(pipe_class_compare);
12719 %}
12720 
12721 instruct indexOf_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
12722                     iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
12723                     flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
12724   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
12725   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
12726          TEMP_DEF result,
12727          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
12728   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
12729   ins_cost(300);
12730 
12731   format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
12732              " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
12733   ins_encode %{
12734     __ string_indexof($result$$Register,
12735                       $haystack$$Register, $haycnt$$Register,
12736                       $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
12737                       $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
12738   %}
12739   ins_pipe(pipe_class_compare);
12740 %}
12741 
12742 // char[] to byte[] compression
12743 instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12744                          iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12745   match(Set result (StrCompressedCopy src (Binary dst len)));
12746   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12747          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12748   ins_cost(300);
12749   format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12750   ins_encode %{
12751     Label Lskip, Ldone;
12752     __ li($result$$Register, 0);
12753     __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12754                           $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
12755     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12756     __ beq(CCR0, Lskip);
12757     __ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
12758     __ bind(Lskip);
12759     __ mr($result$$Register, $len$$Register);
12760     __ bind(Ldone);
12761   %}
12762   ins_pipe(pipe_class_default);
12763 %}
12764 
12765 // byte[] to char[] inflation
12766 instruct string_inflate(Universe dummy, rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegLdst tmp1,
12767                         iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12768   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12769   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12770   ins_cost(300);
12771   format %{ "String Inflate $src,$dst,$len \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12772   ins_encode %{
12773     Label Ldone;
12774     __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
12775                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
12776     __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
12777     __ beq(CCR0, Ldone);
12778     __ string_inflate($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register);
12779     __ bind(Ldone);
12780   %}
12781   ins_pipe(pipe_class_default);
12782 %}
12783 
12784 // StringCoding.java intrinsics
12785 instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2,
12786                        regCTR ctr, flagsRegCR0 cr0)
12787 %{
12788   match(Set result (HasNegatives ary1 len));
12789   effect(TEMP_DEF result, USE_KILL ary1, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0);
12790   ins_cost(300);
12791   format %{ "has negatives byte[] $ary1,$len -> $result \t// KILL $tmp1, $tmp2" %}
12792   ins_encode %{
12793     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register,
12794                      $tmp1$$Register, $tmp2$$Register);
12795   %}
12796   ins_pipe(pipe_class_default);
12797 %}
12798 
12799 // encode char[] to byte[] in ISO_8859_1
12800 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12801                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12802   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12803   match(Set result (EncodeISOArray src (Binary dst len)));
12804   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12805          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12806   ins_cost(300);
12807   format %{ "Encode iso array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12808   ins_encode %{
12809     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
12810                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, false);
12811   %}
12812   ins_pipe(pipe_class_default);
12813 %}
12814 
12815 // encode char[] to byte[] in ASCII
12816 instruct encode_ascii_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
12817                           iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
12818   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12819   match(Set result (EncodeISOArray src (Binary dst len)));
12820   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
12821          USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
12822   ins_cost(300);
12823   format %{ "Encode ascii array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
12824   ins_encode %{
12825     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register, $tmp2$$Register,
12826                         $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, $result$$Register, true);
12827   %}
12828   ins_pipe(pipe_class_default);
12829 %}
12830 
12831 
12832 //---------- Min/Max Instructions ---------------------------------------------
12833 
12834 instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12835   match(Set dst (MinI src1 src2));
12836   ins_cost(DEFAULT_COST*6);
12837 
12838   expand %{
12839     iRegLdst src1s;
12840     iRegLdst src2s;
12841     iRegLdst diff;
12842     iRegLdst sm;
12843     iRegLdst doz; // difference or zero
12844     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12845     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12846     subL_reg_reg(diff, src2s, src1s);
12847     // Need to consider >=33 bit result, therefore we need signmaskL.
12848     signmask64L_regL(sm, diff);
12849     andL_reg_reg(doz, diff, sm); // <=0
12850     addI_regL_regL(dst, doz, src1s);
12851   %}
12852 %}
12853 
12854 instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12855   match(Set dst (MinI src1 src2));
12856   effect(KILL cr0);
12857   predicate(VM_Version::has_isel());
12858   ins_cost(DEFAULT_COST*2);
12859 
12860   ins_encode %{
12861     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12862     __ isel($dst$$Register, CCR0, Assembler::less, /*invert*/false, $src1$$Register, $src2$$Register);
12863   %}
12864   ins_pipe(pipe_class_default);
12865 %}
12866 
12867 instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
12868   match(Set dst (MaxI src1 src2));
12869   ins_cost(DEFAULT_COST*6);
12870 
12871   expand %{
12872     iRegLdst src1s;
12873     iRegLdst src2s;
12874     iRegLdst diff;
12875     iRegLdst sm;
12876     iRegLdst doz; // difference or zero
12877     convI2L_reg(src1s, src1); // Ensure proper sign extension.
12878     convI2L_reg(src2s, src2); // Ensure proper sign extension.
12879     subL_reg_reg(diff, src2s, src1s);
12880     // Need to consider >=33 bit result, therefore we need signmaskL.
12881     signmask64L_regL(sm, diff);
12882     andcL_reg_reg(doz, diff, sm); // >=0
12883     addI_regL_regL(dst, doz, src1s);
12884   %}
12885 %}
12886 
12887 instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
12888   match(Set dst (MaxI src1 src2));
12889   effect(KILL cr0);
12890   predicate(VM_Version::has_isel());
12891   ins_cost(DEFAULT_COST*2);
12892 
12893   ins_encode %{
12894     __ cmpw(CCR0, $src1$$Register, $src2$$Register);
12895     __ isel($dst$$Register, CCR0, Assembler::greater, /*invert*/false, $src1$$Register, $src2$$Register);
12896   %}
12897   ins_pipe(pipe_class_default);
12898 %}
12899 
12900 //---------- Population Count Instructions ------------------------------------
12901 
12902 // Popcnt for Power7.
12903 instruct popCountI(iRegIdst dst, iRegIsrc src) %{
12904   match(Set dst (PopCountI src));
12905   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12906   ins_cost(DEFAULT_COST);
12907 
12908   format %{ "POPCNTW $dst, $src" %}
12909   size(4);
12910   ins_encode %{
12911     __ popcntw($dst$$Register, $src$$Register);
12912   %}
12913   ins_pipe(pipe_class_default);
12914 %}
12915 
12916 // Popcnt for Power7.
12917 instruct popCountL(iRegIdst dst, iRegLsrc src) %{
12918   predicate(UsePopCountInstruction && VM_Version::has_popcntw());
12919   match(Set dst (PopCountL src));
12920   ins_cost(DEFAULT_COST);
12921 
12922   format %{ "POPCNTD $dst, $src" %}
12923   size(4);
12924   ins_encode %{
12925     __ popcntd($dst$$Register, $src$$Register);
12926   %}
12927   ins_pipe(pipe_class_default);
12928 %}
12929 
12930 instruct countLeadingZerosI(iRegIdst dst, iRegIsrc src) %{
12931   match(Set dst (CountLeadingZerosI src));
12932   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12933   ins_cost(DEFAULT_COST);
12934 
12935   format %{ "CNTLZW  $dst, $src" %}
12936   size(4);
12937   ins_encode %{
12938     __ cntlzw($dst$$Register, $src$$Register);
12939   %}
12940   ins_pipe(pipe_class_default);
12941 %}
12942 
12943 instruct countLeadingZerosL(iRegIdst dst, iRegLsrc src) %{
12944   match(Set dst (CountLeadingZerosL src));
12945   predicate(UseCountLeadingZerosInstructionsPPC64);  // See Matcher::match_rule_supported.
12946   ins_cost(DEFAULT_COST);
12947 
12948   format %{ "CNTLZD  $dst, $src" %}
12949   size(4);
12950   ins_encode %{
12951     __ cntlzd($dst$$Register, $src$$Register);
12952   %}
12953   ins_pipe(pipe_class_default);
12954 %}
12955 
12956 instruct countLeadingZerosP(iRegIdst dst, iRegPsrc src) %{
12957   // no match-rule, false predicate
12958   effect(DEF dst, USE src);
12959   predicate(false);
12960 
12961   format %{ "CNTLZD  $dst, $src" %}
12962   size(4);
12963   ins_encode %{
12964     __ cntlzd($dst$$Register, $src$$Register);
12965   %}
12966   ins_pipe(pipe_class_default);
12967 %}
12968 
12969 instruct countTrailingZerosI_Ex(iRegIdst dst, iRegIsrc src) %{
12970   match(Set dst (CountTrailingZerosI src));
12971   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
12972   ins_cost(DEFAULT_COST);
12973 
12974   expand %{
12975     immI16 imm1 %{ (int)-1 %}
12976     immI16 imm2 %{ (int)32 %}
12977     immI_minus1 m1 %{ -1 %}
12978     iRegIdst tmpI1;
12979     iRegIdst tmpI2;
12980     iRegIdst tmpI3;
12981     addI_reg_imm16(tmpI1, src, imm1);
12982     andcI_reg_reg(tmpI2, src, m1, tmpI1);
12983     countLeadingZerosI(tmpI3, tmpI2);
12984     subI_imm16_reg(dst, imm2, tmpI3);
12985   %}
12986 %}
12987 
12988 instruct countTrailingZerosI_cnttzw(iRegIdst dst, iRegIsrc src) %{
12989   match(Set dst (CountTrailingZerosI src));
12990   predicate(UseCountTrailingZerosInstructionsPPC64);
12991   ins_cost(DEFAULT_COST);
12992 
12993   format %{ "CNTTZW  $dst, $src" %}
12994   size(4);
12995   ins_encode %{
12996     __ cnttzw($dst$$Register, $src$$Register);
12997   %}
12998   ins_pipe(pipe_class_default);
12999 %}
13000 
13001 instruct countTrailingZerosL_Ex(iRegIdst dst, iRegLsrc src) %{
13002   match(Set dst (CountTrailingZerosL src));
13003   predicate(UseCountLeadingZerosInstructionsPPC64 && !UseCountTrailingZerosInstructionsPPC64);
13004   ins_cost(DEFAULT_COST);
13005 
13006   expand %{
13007     immL16 imm1 %{ (long)-1 %}
13008     immI16 imm2 %{ (int)64 %}
13009     iRegLdst tmpL1;
13010     iRegLdst tmpL2;
13011     iRegIdst tmpL3;
13012     addL_reg_imm16(tmpL1, src, imm1);
13013     andcL_reg_reg(tmpL2, tmpL1, src);
13014     countLeadingZerosL(tmpL3, tmpL2);
13015     subI_imm16_reg(dst, imm2, tmpL3);
13016  %}
13017 %}
13018 
13019 instruct countTrailingZerosL_cnttzd(iRegIdst dst, iRegLsrc src) %{
13020   match(Set dst (CountTrailingZerosL src));
13021   predicate(UseCountTrailingZerosInstructionsPPC64);
13022   ins_cost(DEFAULT_COST);
13023 
13024   format %{ "CNTTZD  $dst, $src" %}
13025   size(4);
13026   ins_encode %{
13027     __ cnttzd($dst$$Register, $src$$Register);
13028   %}
13029   ins_pipe(pipe_class_default);
13030 %}
13031 
13032 // Expand nodes for byte_reverse_int.
13033 instruct insrwi_a(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13034   effect(DEF dst, USE src, USE pos, USE shift);
13035   predicate(false);
13036 
13037   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13038   size(4);
13039   ins_encode %{
13040     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13041   %}
13042   ins_pipe(pipe_class_default);
13043 %}
13044 
13045 // As insrwi_a, but with USE_DEF.
13046 instruct insrwi(iRegIdst dst, iRegIsrc src, immI16 pos, immI16 shift) %{
13047   effect(USE_DEF dst, USE src, USE pos, USE shift);
13048   predicate(false);
13049 
13050   format %{ "INSRWI  $dst, $src, $pos, $shift" %}
13051   size(4);
13052   ins_encode %{
13053     __ insrwi($dst$$Register, $src$$Register, $shift$$constant, $pos$$constant);
13054   %}
13055   ins_pipe(pipe_class_default);
13056 %}
13057 
13058 // Just slightly faster than java implementation.
13059 instruct bytes_reverse_int_Ex(iRegIdst dst, iRegIsrc src) %{
13060   match(Set dst (ReverseBytesI src));
13061   predicate(!UseByteReverseInstructions);
13062   ins_cost(7*DEFAULT_COST);
13063 
13064   expand %{
13065     immI16 imm24 %{ (int) 24 %}
13066     immI16 imm16 %{ (int) 16 %}
13067     immI16  imm8 %{ (int)  8 %}
13068     immI16  imm4 %{ (int)  4 %}
13069     immI16  imm0 %{ (int)  0 %}
13070     iRegLdst tmpI1;
13071     iRegLdst tmpI2;
13072     iRegLdst tmpI3;
13073 
13074     urShiftI_reg_imm(tmpI1, src, imm24);
13075     insrwi_a(dst, tmpI1, imm24, imm8);
13076     urShiftI_reg_imm(tmpI2, src, imm16);
13077     insrwi(dst, tmpI2, imm8, imm16);
13078     urShiftI_reg_imm(tmpI3, src, imm8);
13079     insrwi(dst, tmpI3, imm8, imm8);
13080     insrwi(dst, src, imm0, imm8);
13081   %}
13082 %}
13083 
13084 instruct bytes_reverse_int_vec(iRegIdst dst, iRegIsrc src, vecX tmpV) %{
13085   match(Set dst (ReverseBytesI src));
13086   predicate(UseVectorByteReverseInstructionsPPC64);
13087   effect(TEMP tmpV);
13088   ins_cost(DEFAULT_COST*3);
13089   size(12);
13090   format %{ "MTVSRWZ $tmpV, $src\n"
13091             "\tXXBRW   $tmpV, $tmpV\n"
13092             "\tMFVSRWZ $dst, $tmpV" %}
13093 
13094   ins_encode %{
13095     __ mtvsrwz($tmpV$$VectorSRegister, $src$$Register);
13096     __ xxbrw($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13097     __ mfvsrwz($dst$$Register, $tmpV$$VectorSRegister);
13098   %}
13099   ins_pipe(pipe_class_default);
13100 %}
13101 
13102 instruct bytes_reverse_int(iRegIdst dst, iRegIsrc src) %{
13103   match(Set dst (ReverseBytesI src));
13104   predicate(UseByteReverseInstructions);
13105   ins_cost(DEFAULT_COST);
13106   size(4);
13107 
13108   format %{ "BRW  $dst, $src" %}
13109 
13110   ins_encode %{
13111     __ brw($dst$$Register, $src$$Register);
13112   %}
13113   ins_pipe(pipe_class_default);
13114 %}
13115 
13116 instruct bytes_reverse_long_Ex(iRegLdst dst, iRegLsrc src) %{
13117   match(Set dst (ReverseBytesL src));
13118   predicate(!UseByteReverseInstructions);
13119   ins_cost(15*DEFAULT_COST);
13120 
13121   expand %{
13122     immI16 imm56 %{ (int) 56 %}
13123     immI16 imm48 %{ (int) 48 %}
13124     immI16 imm40 %{ (int) 40 %}
13125     immI16 imm32 %{ (int) 32 %}
13126     immI16 imm24 %{ (int) 24 %}
13127     immI16 imm16 %{ (int) 16 %}
13128     immI16  imm8 %{ (int)  8 %}
13129     immI16  imm0 %{ (int)  0 %}
13130     iRegLdst tmpL1;
13131     iRegLdst tmpL2;
13132     iRegLdst tmpL3;
13133     iRegLdst tmpL4;
13134     iRegLdst tmpL5;
13135     iRegLdst tmpL6;
13136 
13137                                         // src   : |a|b|c|d|e|f|g|h|
13138     rldicl(tmpL1, src, imm8, imm24);    // tmpL1 : | | | |e|f|g|h|a|
13139     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |a| | | |e|
13140     rldicl(tmpL3, tmpL2, imm32, imm0);  // tmpL3 : | | | |e| | | |a|
13141     rldicl(tmpL1, src, imm16, imm24);   // tmpL1 : | | | |f|g|h|a|b|
13142     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |b| | | |f|
13143     rldicl(tmpL4, tmpL2, imm40, imm0);  // tmpL4 : | | |f| | | |b| |
13144     orL_reg_reg(tmpL5, tmpL3, tmpL4);   // tmpL5 : | | |f|e| | |b|a|
13145     rldicl(tmpL1, src, imm24, imm24);   // tmpL1 : | | | |g|h|a|b|c|
13146     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |c| | | |g|
13147     rldicl(tmpL3, tmpL2, imm48, imm0);  // tmpL3 : | |g| | | |c| | |
13148     rldicl(tmpL1, src, imm32, imm24);   // tmpL1 : | | | |h|a|b|c|d|
13149     rldicl(tmpL2, tmpL1, imm32, imm24); // tmpL2 : | | | |d| | | |h|
13150     rldicl(tmpL4, tmpL2, imm56, imm0);  // tmpL4 : |h| | | |d| | | |
13151     orL_reg_reg(tmpL6, tmpL3, tmpL4);   // tmpL6 : |h|g| | |d|c| | |
13152     orL_reg_reg(dst, tmpL5, tmpL6);     // dst   : |h|g|f|e|d|c|b|a|
13153   %}
13154 %}
13155 
13156 instruct bytes_reverse_long_vec(iRegLdst dst, iRegLsrc src, vecX tmpV) %{
13157   match(Set dst (ReverseBytesL src));
13158   predicate(UseVectorByteReverseInstructionsPPC64);
13159   effect(TEMP tmpV);
13160   ins_cost(DEFAULT_COST*3);
13161   size(12);
13162   format %{ "MTVSRD  $tmpV, $src\n"
13163             "\tXXBRD   $tmpV, $tmpV\n"
13164             "\tMFVSRD  $dst, $tmpV" %}
13165 
13166   ins_encode %{
13167     __ mtvsrd($tmpV$$VectorSRegister, $src$$Register);
13168     __ xxbrd($tmpV$$VectorSRegister, $tmpV$$VectorSRegister);
13169     __ mfvsrd($dst$$Register, $tmpV$$VectorSRegister);
13170   %}
13171   ins_pipe(pipe_class_default);
13172 %}
13173 
13174 instruct bytes_reverse_long(iRegLdst dst, iRegLsrc src) %{
13175   match(Set dst (ReverseBytesL src));
13176   predicate(UseByteReverseInstructions);
13177   ins_cost(DEFAULT_COST);
13178   size(4);
13179 
13180   format %{ "BRD  $dst, $src" %}
13181 
13182   ins_encode %{
13183     __ brd($dst$$Register, $src$$Register);
13184   %}
13185   ins_pipe(pipe_class_default);
13186 %}
13187 
13188 instruct bytes_reverse_ushort_Ex(iRegIdst dst, iRegIsrc src) %{
13189   match(Set dst (ReverseBytesUS src));
13190   predicate(!UseByteReverseInstructions);
13191   ins_cost(2*DEFAULT_COST);
13192 
13193   expand %{
13194     immI16  imm16 %{ (int) 16 %}
13195     immI16   imm8 %{ (int)  8 %}
13196 
13197     urShiftI_reg_imm(dst, src, imm8);
13198     insrwi(dst, src, imm16, imm8);
13199   %}
13200 %}
13201 
13202 instruct bytes_reverse_ushort(iRegIdst dst, iRegIsrc src) %{
13203   match(Set dst (ReverseBytesUS src));
13204   predicate(UseByteReverseInstructions);
13205   ins_cost(DEFAULT_COST);
13206   size(4);
13207 
13208   format %{ "BRH  $dst, $src" %}
13209 
13210   ins_encode %{
13211     __ brh($dst$$Register, $src$$Register);
13212   %}
13213   ins_pipe(pipe_class_default);
13214 %}
13215 
13216 instruct bytes_reverse_short_Ex(iRegIdst dst, iRegIsrc src) %{
13217   match(Set dst (ReverseBytesS src));
13218   predicate(!UseByteReverseInstructions);
13219   ins_cost(3*DEFAULT_COST);
13220 
13221   expand %{
13222     immI16  imm16 %{ (int) 16 %}
13223     immI16   imm8 %{ (int)  8 %}
13224     iRegLdst tmpI1;
13225 
13226     urShiftI_reg_imm(tmpI1, src, imm8);
13227     insrwi(tmpI1, src, imm16, imm8);
13228     extsh(dst, tmpI1);
13229   %}
13230 %}
13231 
13232 instruct bytes_reverse_short(iRegIdst dst, iRegIsrc src) %{
13233   match(Set dst (ReverseBytesS src));
13234   predicate(UseByteReverseInstructions);
13235   ins_cost(DEFAULT_COST);
13236   size(8);
13237 
13238   format %{ "BRH   $dst, $src\n\t"
13239             "EXTSH $dst, $dst" %}
13240 
13241   ins_encode %{
13242     __ brh($dst$$Register, $src$$Register);
13243     __ extsh($dst$$Register, $dst$$Register);
13244   %}
13245   ins_pipe(pipe_class_default);
13246 %}
13247 
13248 // Load Integer reversed byte order
13249 instruct loadI_reversed(iRegIdst dst, indirect mem) %{
13250   match(Set dst (ReverseBytesI (LoadI mem)));
13251   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13252   ins_cost(MEMORY_REF_COST);
13253 
13254   size(4);
13255   ins_encode %{
13256     __ lwbrx($dst$$Register, $mem$$Register);
13257   %}
13258   ins_pipe(pipe_class_default);
13259 %}
13260 
13261 instruct loadI_reversed_acquire(iRegIdst dst, indirect mem) %{
13262   match(Set dst (ReverseBytesI (LoadI mem)));
13263   ins_cost(2 * MEMORY_REF_COST);
13264 
13265   size(12);
13266   ins_encode %{
13267     __ lwbrx($dst$$Register, $mem$$Register);
13268     __ twi_0($dst$$Register);
13269     __ isync();
13270   %}
13271   ins_pipe(pipe_class_default);
13272 %}
13273 
13274 // Load Long - aligned and reversed
13275 instruct loadL_reversed(iRegLdst dst, indirect mem) %{
13276   match(Set dst (ReverseBytesL (LoadL mem)));
13277   predicate(VM_Version::has_ldbrx() && (n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1))));
13278   ins_cost(MEMORY_REF_COST);
13279 
13280   size(4);
13281   ins_encode %{
13282     __ ldbrx($dst$$Register, $mem$$Register);
13283   %}
13284   ins_pipe(pipe_class_default);
13285 %}
13286 
13287 instruct loadL_reversed_acquire(iRegLdst dst, indirect mem) %{
13288   match(Set dst (ReverseBytesL (LoadL mem)));
13289   predicate(VM_Version::has_ldbrx());
13290   ins_cost(2 * MEMORY_REF_COST);
13291 
13292   size(12);
13293   ins_encode %{
13294     __ ldbrx($dst$$Register, $mem$$Register);
13295     __ twi_0($dst$$Register);
13296     __ isync();
13297   %}
13298   ins_pipe(pipe_class_default);
13299 %}
13300 
13301 // Load unsigned short / char reversed byte order
13302 instruct loadUS_reversed(iRegIdst dst, indirect mem) %{
13303   match(Set dst (ReverseBytesUS (LoadUS mem)));
13304   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13305   ins_cost(MEMORY_REF_COST);
13306 
13307   size(4);
13308   ins_encode %{
13309     __ lhbrx($dst$$Register, $mem$$Register);
13310   %}
13311   ins_pipe(pipe_class_default);
13312 %}
13313 
13314 instruct loadUS_reversed_acquire(iRegIdst dst, indirect mem) %{
13315   match(Set dst (ReverseBytesUS (LoadUS mem)));
13316   ins_cost(2 * MEMORY_REF_COST);
13317 
13318   size(12);
13319   ins_encode %{
13320     __ lhbrx($dst$$Register, $mem$$Register);
13321     __ twi_0($dst$$Register);
13322     __ isync();
13323   %}
13324   ins_pipe(pipe_class_default);
13325 %}
13326 
13327 // Load short reversed byte order
13328 instruct loadS_reversed(iRegIdst dst, indirect mem) %{
13329   match(Set dst (ReverseBytesS (LoadS mem)));
13330   predicate(n->in(1)->as_Load()->is_unordered() || followed_by_acquire(n->in(1)));
13331   ins_cost(MEMORY_REF_COST + DEFAULT_COST);
13332 
13333   size(8);
13334   ins_encode %{
13335     __ lhbrx($dst$$Register, $mem$$Register);
13336     __ extsh($dst$$Register, $dst$$Register);
13337   %}
13338   ins_pipe(pipe_class_default);
13339 %}
13340 
13341 instruct loadS_reversed_acquire(iRegIdst dst, indirect mem) %{
13342   match(Set dst (ReverseBytesS (LoadS mem)));
13343   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13344 
13345   size(16);
13346   ins_encode %{
13347     __ lhbrx($dst$$Register, $mem$$Register);
13348     __ twi_0($dst$$Register);
13349     __ extsh($dst$$Register, $dst$$Register);
13350     __ isync();
13351   %}
13352   ins_pipe(pipe_class_default);
13353 %}
13354 
13355 // Store Integer reversed byte order
13356 instruct storeI_reversed(iRegIsrc src, indirect mem) %{
13357   match(Set mem (StoreI mem (ReverseBytesI src)));
13358   ins_cost(MEMORY_REF_COST);
13359 
13360   size(4);
13361   ins_encode %{
13362     __ stwbrx($src$$Register, $mem$$Register);
13363   %}
13364   ins_pipe(pipe_class_default);
13365 %}
13366 
13367 // Store Long reversed byte order
13368 instruct storeL_reversed(iRegLsrc src, indirect mem) %{
13369   match(Set mem (StoreL mem (ReverseBytesL src)));
13370   predicate(VM_Version::has_stdbrx());
13371   ins_cost(MEMORY_REF_COST);
13372 
13373   size(4);
13374   ins_encode %{
13375     __ stdbrx($src$$Register, $mem$$Register);
13376   %}
13377   ins_pipe(pipe_class_default);
13378 %}
13379 
13380 // Store unsigned short / char reversed byte order
13381 instruct storeUS_reversed(iRegIsrc src, indirect mem) %{
13382   match(Set mem (StoreC mem (ReverseBytesUS src)));
13383   ins_cost(MEMORY_REF_COST);
13384 
13385   size(4);
13386   ins_encode %{
13387     __ sthbrx($src$$Register, $mem$$Register);
13388   %}
13389   ins_pipe(pipe_class_default);
13390 %}
13391 
13392 // Store short reversed byte order
13393 instruct storeS_reversed(iRegIsrc src, indirect mem) %{
13394   match(Set mem (StoreC mem (ReverseBytesS src)));
13395   ins_cost(MEMORY_REF_COST);
13396 
13397   size(4);
13398   ins_encode %{
13399     __ sthbrx($src$$Register, $mem$$Register);
13400   %}
13401   ins_pipe(pipe_class_default);
13402 %}
13403 
13404 instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
13405   effect(DEF temp1, USE src);
13406 
13407   format %{ "MTVSRWZ $temp1, $src \t// Move to 16-byte register" %}
13408   size(4);
13409   ins_encode %{
13410     __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
13411   %}
13412   ins_pipe(pipe_class_default);
13413 %}
13414 
13415 instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
13416   effect(DEF dst, USE src, USE imm1);
13417 
13418   format %{ "XXSPLTW $dst, $src, $imm1 \t// Splat word" %}
13419   size(4);
13420   ins_encode %{
13421     __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
13422   %}
13423   ins_pipe(pipe_class_default);
13424 %}
13425 
13426 instruct xscvdpspn_regF(vecX dst, regF src) %{
13427   effect(DEF dst, USE src);
13428 
13429   format %{ "XSCVDPSPN $dst, $src \t// Convert scalar single precision to vector single precision" %}
13430   size(4);
13431   ins_encode %{
13432     __ xscvdpspn($dst$$VectorSRegister, $src$$FloatRegister->to_vsr());
13433   %}
13434   ins_pipe(pipe_class_default);
13435 %}
13436 
13437 //---------- Replicate Vector Instructions ------------------------------------
13438 
13439 // Insrdi does replicate if src == dst.
13440 instruct repl32(iRegLdst dst) %{
13441   predicate(false);
13442   effect(USE_DEF dst);
13443 
13444   format %{ "INSRDI  $dst, #0, $dst, #32 \t// replicate" %}
13445   size(4);
13446   ins_encode %{
13447     __ insrdi($dst$$Register, $dst$$Register, 32, 0);
13448   %}
13449   ins_pipe(pipe_class_default);
13450 %}
13451 
13452 // Insrdi does replicate if src == dst.
13453 instruct repl48(iRegLdst dst) %{
13454   predicate(false);
13455   effect(USE_DEF dst);
13456 
13457   format %{ "INSRDI  $dst, #0, $dst, #48 \t// replicate" %}
13458   size(4);
13459   ins_encode %{
13460     __ insrdi($dst$$Register, $dst$$Register, 48, 0);
13461   %}
13462   ins_pipe(pipe_class_default);
13463 %}
13464 
13465 // Insrdi does replicate if src == dst.
13466 instruct repl56(iRegLdst dst) %{
13467   predicate(false);
13468   effect(USE_DEF dst);
13469 
13470   format %{ "INSRDI  $dst, #0, $dst, #56 \t// replicate" %}
13471   size(4);
13472   ins_encode %{
13473     __ insrdi($dst$$Register, $dst$$Register, 56, 0);
13474   %}
13475   ins_pipe(pipe_class_default);
13476 %}
13477 
13478 instruct repl8B_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13479   match(Set dst (ReplicateB src));
13480   predicate(n->as_Vector()->length() == 8);
13481   expand %{
13482     moveReg(dst, src);
13483     repl56(dst);
13484     repl48(dst);
13485     repl32(dst);
13486   %}
13487 %}
13488 
13489 instruct repl8B_immI0(iRegLdst dst, immI_0 zero) %{
13490   match(Set dst (ReplicateB zero));
13491   predicate(n->as_Vector()->length() == 8);
13492   format %{ "LI      $dst, #0 \t// replicate8B" %}
13493   size(4);
13494   ins_encode %{
13495     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13496   %}
13497   ins_pipe(pipe_class_default);
13498 %}
13499 
13500 instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
13501   match(Set dst (ReplicateB src));
13502   predicate(n->as_Vector()->length() == 8);
13503   format %{ "LI      $dst, #-1 \t// replicate8B" %}
13504   size(4);
13505   ins_encode %{
13506     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13507   %}
13508   ins_pipe(pipe_class_default);
13509 %}
13510 
13511 instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
13512   match(Set dst (ReplicateB src));
13513   predicate(n->as_Vector()->length() == 16);
13514 
13515   expand %{
13516     iRegLdst tmpL;
13517     vecX tmpV;
13518     immI8  imm1 %{ (int)  1 %}
13519     moveReg(tmpL, src);
13520     repl56(tmpL);
13521     repl48(tmpL);
13522     mtvsrwz(tmpV, tmpL);
13523     xxspltw(dst, tmpV, imm1);
13524   %}
13525 %}
13526 
13527 instruct repl16B_immI0(vecX dst, immI_0 zero) %{
13528   match(Set dst (ReplicateB zero));
13529   predicate(n->as_Vector()->length() == 16);
13530 
13531   format %{ "XXLXOR      $dst, $zero \t// replicate16B" %}
13532   size(4);
13533   ins_encode %{
13534     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13535   %}
13536   ins_pipe(pipe_class_default);
13537 %}
13538 
13539 instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
13540   match(Set dst (ReplicateB src));
13541   predicate(n->as_Vector()->length() == 16);
13542 
13543   format %{ "XXLEQV      $dst, $src \t// replicate16B" %}
13544   size(4);
13545   ins_encode %{
13546     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13547   %}
13548   ins_pipe(pipe_class_default);
13549 %}
13550 
13551 instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13552   match(Set dst (ReplicateS src));
13553   predicate(n->as_Vector()->length() == 4);
13554   expand %{
13555     moveReg(dst, src);
13556     repl48(dst);
13557     repl32(dst);
13558   %}
13559 %}
13560 
13561 instruct repl4S_immI0(iRegLdst dst, immI_0 zero) %{
13562   match(Set dst (ReplicateS zero));
13563   predicate(n->as_Vector()->length() == 4);
13564   format %{ "LI      $dst, #0 \t// replicate4S" %}
13565   size(4);
13566   ins_encode %{
13567     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13568   %}
13569   ins_pipe(pipe_class_default);
13570 %}
13571 
13572 instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
13573   match(Set dst (ReplicateS src));
13574   predicate(n->as_Vector()->length() == 4);
13575   format %{ "LI      $dst, -1 \t// replicate4S" %}
13576   size(4);
13577   ins_encode %{
13578     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13579   %}
13580   ins_pipe(pipe_class_default);
13581 %}
13582 
13583 instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
13584   match(Set dst (ReplicateS src));
13585   predicate(n->as_Vector()->length() == 8);
13586 
13587   expand %{
13588     iRegLdst tmpL;
13589     vecX tmpV;
13590     immI8  zero %{ (int)  0 %}
13591     moveReg(tmpL, src);
13592     repl48(tmpL);
13593     repl32(tmpL);
13594     mtvsrd(tmpV, tmpL);
13595     xxpermdi(dst, tmpV, tmpV, zero);
13596   %}
13597 %}
13598 
13599 instruct repl8S_immI0(vecX dst, immI_0 zero) %{
13600   match(Set dst (ReplicateS zero));
13601   predicate(n->as_Vector()->length() == 8);
13602 
13603   format %{ "XXLXOR      $dst, $zero \t// replicate8S" %}
13604   size(4);
13605   ins_encode %{
13606     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13607   %}
13608   ins_pipe(pipe_class_default);
13609 %}
13610 
13611 instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
13612   match(Set dst (ReplicateS src));
13613   predicate(n->as_Vector()->length() == 8);
13614 
13615   format %{ "XXLEQV      $dst, $src \t// replicate8S" %}
13616   size(4);
13617   ins_encode %{
13618     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13619   %}
13620   ins_pipe(pipe_class_default);
13621 %}
13622 
13623 instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
13624   match(Set dst (ReplicateI src));
13625   predicate(n->as_Vector()->length() == 2);
13626   ins_cost(2 * DEFAULT_COST);
13627   expand %{
13628     moveReg(dst, src);
13629     repl32(dst);
13630   %}
13631 %}
13632 
13633 instruct repl2I_immI0(iRegLdst dst, immI_0 zero) %{
13634   match(Set dst (ReplicateI zero));
13635   predicate(n->as_Vector()->length() == 2);
13636   format %{ "LI      $dst, #0 \t// replicate2I" %}
13637   size(4);
13638   ins_encode %{
13639     __ li($dst$$Register, (int)((short)($zero$$constant & 0xFFFF)));
13640   %}
13641   ins_pipe(pipe_class_default);
13642 %}
13643 
13644 instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
13645   match(Set dst (ReplicateI src));
13646   predicate(n->as_Vector()->length() == 2);
13647   format %{ "LI      $dst, -1 \t// replicate2I" %}
13648   size(4);
13649   ins_encode %{
13650     __ li($dst$$Register, (int)((short)($src$$constant & 0xFFFF)));
13651   %}
13652   ins_pipe(pipe_class_default);
13653 %}
13654 
13655 instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
13656   match(Set dst (ReplicateI src));
13657   predicate(n->as_Vector()->length() == 4);
13658   ins_cost(2 * DEFAULT_COST);
13659 
13660   expand %{
13661     iRegLdst tmpL;
13662     vecX tmpV;
13663     immI8  zero %{ (int)  0 %}
13664     moveReg(tmpL, src);
13665     repl32(tmpL);
13666     mtvsrd(tmpV, tmpL);
13667     xxpermdi(dst, tmpV, tmpV, zero);
13668   %}
13669 %}
13670 
13671 instruct repl4I_immI0(vecX dst, immI_0 zero) %{
13672   match(Set dst (ReplicateI zero));
13673   predicate(n->as_Vector()->length() == 4);
13674 
13675   format %{ "XXLXOR      $dst, $zero \t// replicate4I" %}
13676   size(4);
13677   ins_encode %{
13678     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13679   %}
13680   ins_pipe(pipe_class_default);
13681 %}
13682 
13683 instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
13684   match(Set dst (ReplicateI src));
13685   predicate(n->as_Vector()->length() == 4);
13686 
13687   format %{ "XXLEQV      $dst, $dst, $dst \t// replicate4I" %}
13688   size(4);
13689   ins_encode %{
13690     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
13691   %}
13692   ins_pipe(pipe_class_default);
13693 %}
13694 
13695 // Move float to int register via stack, replicate.
13696 instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
13697   match(Set dst (ReplicateF src));
13698   predicate(n->as_Vector()->length() == 2);
13699   ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
13700   expand %{
13701     stackSlotL tmpS;
13702     iRegIdst tmpI;
13703     moveF2I_reg_stack(tmpS, src);   // Move float to stack.
13704     moveF2I_stack_reg(tmpI, tmpS);  // Move stack to int reg.
13705     moveReg(dst, tmpI);             // Move int to long reg.
13706     repl32(dst);                    // Replicate bitpattern.
13707   %}
13708 %}
13709 
13710 // Replicate scalar constant to packed float values in Double register
13711 instruct repl2F_immF_Ex(iRegLdst dst, immF src) %{
13712   match(Set dst (ReplicateF src));
13713   predicate(n->as_Vector()->length() == 2);
13714   ins_cost(5 * DEFAULT_COST);
13715 
13716   format %{ "LD      $dst, offset, $constanttablebase\t// load replicated float $src $src from table, postalloc expanded" %}
13717   postalloc_expand( postalloc_expand_load_replF_constant(dst, src, constanttablebase) );
13718 %}
13719 
13720 // Replicate scalar zero constant to packed float values in Double register
13721 instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{
13722   match(Set dst (ReplicateF zero));
13723   predicate(n->as_Vector()->length() == 2);
13724 
13725   format %{ "LI      $dst, #0 \t// replicate2F" %}
13726   ins_encode %{
13727     __ li($dst$$Register, 0x0);
13728   %}
13729   ins_pipe(pipe_class_default);
13730 %}
13731 
13732 
13733 //----------Vector Arithmetic Instructions--------------------------------------
13734 
13735 // Vector Addition Instructions
13736 
13737 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
13738   match(Set dst (AddVB src1 src2));
13739   predicate(n->as_Vector()->length() == 16);
13740   format %{ "VADDUBM  $dst,$src1,$src2\t// add packed16B" %}
13741   size(4);
13742   ins_encode %{
13743     __ vaddubm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13744   %}
13745   ins_pipe(pipe_class_default);
13746 %}
13747 
13748 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
13749   match(Set dst (AddVS src1 src2));
13750   predicate(n->as_Vector()->length() == 8);
13751   format %{ "VADDUHM  $dst,$src1,$src2\t// add packed8S" %}
13752   size(4);
13753   ins_encode %{
13754     __ vadduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13755   %}
13756   ins_pipe(pipe_class_default);
13757 %}
13758 
13759 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
13760   match(Set dst (AddVI src1 src2));
13761   predicate(n->as_Vector()->length() == 4);
13762   format %{ "VADDUWM  $dst,$src1,$src2\t// add packed4I" %}
13763   size(4);
13764   ins_encode %{
13765     __ vadduwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13766   %}
13767   ins_pipe(pipe_class_default);
13768 %}
13769 
13770 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
13771   match(Set dst (AddVF src1 src2));
13772   predicate(n->as_Vector()->length() == 4);
13773   format %{ "VADDFP  $dst,$src1,$src2\t// add packed4F" %}
13774   size(4);
13775   ins_encode %{
13776     __ vaddfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13777   %}
13778   ins_pipe(pipe_class_default);
13779 %}
13780 
13781 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
13782   match(Set dst (AddVL src1 src2));
13783   predicate(n->as_Vector()->length() == 2);
13784   format %{ "VADDUDM  $dst,$src1,$src2\t// add packed2L" %}
13785   size(4);
13786   ins_encode %{
13787     __ vaddudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13788   %}
13789   ins_pipe(pipe_class_default);
13790 %}
13791 
13792 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
13793   match(Set dst (AddVD src1 src2));
13794   predicate(n->as_Vector()->length() == 2);
13795   format %{ "XVADDDP  $dst,$src1,$src2\t// add packed2D" %}
13796   size(4);
13797   ins_encode %{
13798     __ xvadddp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13799   %}
13800   ins_pipe(pipe_class_default);
13801 %}
13802 
13803 // Vector Subtraction Instructions
13804 
13805 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
13806   match(Set dst (SubVB src1 src2));
13807   predicate(n->as_Vector()->length() == 16);
13808   format %{ "VSUBUBM  $dst,$src1,$src2\t// sub packed16B" %}
13809   size(4);
13810   ins_encode %{
13811     __ vsububm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13812   %}
13813   ins_pipe(pipe_class_default);
13814 %}
13815 
13816 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
13817   match(Set dst (SubVS src1 src2));
13818   predicate(n->as_Vector()->length() == 8);
13819   format %{ "VSUBUHM  $dst,$src1,$src2\t// sub packed8S" %}
13820   size(4);
13821   ins_encode %{
13822     __ vsubuhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13823   %}
13824   ins_pipe(pipe_class_default);
13825 %}
13826 
13827 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
13828   match(Set dst (SubVI src1 src2));
13829   predicate(n->as_Vector()->length() == 4);
13830   format %{ "VSUBUWM  $dst,$src1,$src2\t// sub packed4I" %}
13831   size(4);
13832   ins_encode %{
13833     __ vsubuwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13834   %}
13835   ins_pipe(pipe_class_default);
13836 %}
13837 
13838 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
13839   match(Set dst (SubVF src1 src2));
13840   predicate(n->as_Vector()->length() == 4);
13841   format %{ "VSUBFP  $dst,$src1,$src2\t// sub packed4F" %}
13842   size(4);
13843   ins_encode %{
13844     __ vsubfp($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13845   %}
13846   ins_pipe(pipe_class_default);
13847 %}
13848 
13849 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
13850   match(Set dst (SubVL src1 src2));
13851   predicate(n->as_Vector()->length() == 2);
13852   format %{ "VSUBUDM  $dst,$src1,$src2\t// sub packed2L" %}
13853   size(4);
13854   ins_encode %{
13855     __ vsubudm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13856   %}
13857   ins_pipe(pipe_class_default);
13858 %}
13859 
13860 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
13861   match(Set dst (SubVD src1 src2));
13862   predicate(n->as_Vector()->length() == 2);
13863   format %{ "XVSUBDP  $dst,$src1,$src2\t// sub packed2D" %}
13864   size(4);
13865   ins_encode %{
13866     __ xvsubdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13867   %}
13868   ins_pipe(pipe_class_default);
13869 %}
13870 
13871 // Vector Multiplication Instructions
13872 
13873 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2, vecX tmp) %{
13874   match(Set dst (MulVS src1 src2));
13875   predicate(n->as_Vector()->length() == 8);
13876   effect(TEMP tmp);
13877   format %{ "VSPLTISH  $tmp,0\t// mul packed8S" %}
13878   format %{ "VMLADDUHM  $dst,$src1,$src2\t// mul packed8S" %}
13879   size(8);
13880   ins_encode %{
13881     __ vspltish($tmp$$VectorSRegister->to_vr(), 0);
13882     __ vmladduhm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr(), $tmp$$VectorSRegister->to_vr());
13883   %}
13884   ins_pipe(pipe_class_default);
13885 %}
13886 
13887 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
13888   match(Set dst (MulVI src1 src2));
13889   predicate(n->as_Vector()->length() == 4);
13890   format %{ "VMULUWM  $dst,$src1,$src2\t// mul packed4I" %}
13891   size(4);
13892   ins_encode %{
13893     __ vmuluwm($dst$$VectorSRegister->to_vr(), $src1$$VectorSRegister->to_vr(), $src2$$VectorSRegister->to_vr());
13894   %}
13895   ins_pipe(pipe_class_default);
13896 %}
13897 
13898 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
13899   match(Set dst (MulVF src1 src2));
13900   predicate(n->as_Vector()->length() == 4);
13901   format %{ "XVMULSP  $dst,$src1,$src2\t// mul packed4F" %}
13902   size(4);
13903   ins_encode %{
13904     __ xvmulsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13905   %}
13906   ins_pipe(pipe_class_default);
13907 %}
13908 
13909 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
13910   match(Set dst (MulVD src1 src2));
13911   predicate(n->as_Vector()->length() == 2);
13912   format %{ "XVMULDP  $dst,$src1,$src2\t// mul packed2D" %}
13913   size(4);
13914   ins_encode %{
13915     __ xvmuldp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13916   %}
13917   ins_pipe(pipe_class_default);
13918 %}
13919 
13920 // Vector Division Instructions
13921 
13922 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
13923   match(Set dst (DivVF src1 src2));
13924   predicate(n->as_Vector()->length() == 4);
13925   format %{ "XVDIVSP  $dst,$src1,$src2\t// div packed4F" %}
13926   size(4);
13927   ins_encode %{
13928     __ xvdivsp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13929   %}
13930   ins_pipe(pipe_class_default);
13931 %}
13932 
13933 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
13934   match(Set dst (DivVD src1 src2));
13935   predicate(n->as_Vector()->length() == 2);
13936   format %{ "XVDIVDP  $dst,$src1,$src2\t// div packed2D" %}
13937   size(4);
13938   ins_encode %{
13939     __ xvdivdp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
13940   %}
13941   ins_pipe(pipe_class_default);
13942 %}
13943 
13944 // Vector Absolute Instructions
13945 
13946 instruct vabs4F_reg(vecX dst, vecX src) %{
13947   match(Set dst (AbsVF src));
13948   predicate(n->as_Vector()->length() == 4);
13949   format %{ "XVABSSP $dst,$src\t// absolute packed4F" %}
13950   size(4);
13951   ins_encode %{
13952     __ xvabssp($dst$$VectorSRegister, $src$$VectorSRegister);
13953   %}
13954   ins_pipe(pipe_class_default);
13955 %}
13956 
13957 instruct vabs2D_reg(vecX dst, vecX src) %{
13958   match(Set dst (AbsVD src));
13959   predicate(n->as_Vector()->length() == 2);
13960   format %{ "XVABSDP $dst,$src\t// absolute packed2D" %}
13961   size(4);
13962   ins_encode %{
13963     __ xvabsdp($dst$$VectorSRegister, $src$$VectorSRegister);
13964   %}
13965   ins_pipe(pipe_class_default);
13966 %}
13967 
13968 // Round Instructions
13969 instruct roundD_reg(regD dst, regD src, immI8 rmode) %{
13970   match(Set dst (RoundDoubleMode src rmode));
13971   format %{ "RoundDoubleMode $src,$rmode" %}
13972   size(4);
13973   ins_encode %{
13974     switch ($rmode$$constant) {
13975       case RoundDoubleModeNode::rmode_rint:
13976         __ xvrdpic($dst$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr());
13977         break;
13978       case RoundDoubleModeNode::rmode_floor:
13979         __ frim($dst$$FloatRegister, $src$$FloatRegister);
13980         break;
13981       case RoundDoubleModeNode::rmode_ceil:
13982         __ frip($dst$$FloatRegister, $src$$FloatRegister);
13983         break;
13984       default:
13985         ShouldNotReachHere();
13986     }
13987   %}
13988   ins_pipe(pipe_class_default);
13989 %}
13990 
13991 // Vector Round Instructions
13992 instruct vround2D_reg(vecX dst, vecX src, immI8 rmode) %{
13993   match(Set dst (RoundDoubleModeV src rmode));
13994   predicate(n->as_Vector()->length() == 2);
13995   format %{ "RoundDoubleModeV $src,$rmode" %}
13996   size(4);
13997   ins_encode %{
13998     switch ($rmode$$constant) {
13999       case RoundDoubleModeNode::rmode_rint:
14000         __ xvrdpic($dst$$VectorSRegister, $src$$VectorSRegister);
14001         break;
14002       case RoundDoubleModeNode::rmode_floor:
14003         __ xvrdpim($dst$$VectorSRegister, $src$$VectorSRegister);
14004         break;
14005       case RoundDoubleModeNode::rmode_ceil:
14006         __ xvrdpip($dst$$VectorSRegister, $src$$VectorSRegister);
14007         break;
14008       default:
14009         ShouldNotReachHere();
14010     }
14011   %}
14012   ins_pipe(pipe_class_default);
14013 %}
14014 
14015 // Vector Negate Instructions
14016 
14017 instruct vneg4F_reg(vecX dst, vecX src) %{
14018   match(Set dst (NegVF src));
14019   predicate(n->as_Vector()->length() == 4);
14020   format %{ "XVNEGSP $dst,$src\t// negate packed4F" %}
14021   size(4);
14022   ins_encode %{
14023     __ xvnegsp($dst$$VectorSRegister, $src$$VectorSRegister);
14024   %}
14025   ins_pipe(pipe_class_default);
14026 %}
14027 
14028 instruct vneg2D_reg(vecX dst, vecX src) %{
14029   match(Set dst (NegVD src));
14030   predicate(n->as_Vector()->length() == 2);
14031   format %{ "XVNEGDP $dst,$src\t// negate packed2D" %}
14032   size(4);
14033   ins_encode %{
14034     __ xvnegdp($dst$$VectorSRegister, $src$$VectorSRegister);
14035   %}
14036   ins_pipe(pipe_class_default);
14037 %}
14038 
14039 // Vector Square Root Instructions
14040 
14041 instruct vsqrt4F_reg(vecX dst, vecX src) %{
14042   match(Set dst (SqrtVF src));
14043   predicate(n->as_Vector()->length() == 4);
14044   format %{ "XVSQRTSP $dst,$src\t// sqrt packed4F" %}
14045   size(4);
14046   ins_encode %{
14047     __ xvsqrtsp($dst$$VectorSRegister, $src$$VectorSRegister);
14048   %}
14049   ins_pipe(pipe_class_default);
14050 %}
14051 
14052 instruct vsqrt2D_reg(vecX dst, vecX src) %{
14053   match(Set dst (SqrtVD src));
14054   predicate(n->as_Vector()->length() == 2);
14055   format %{ "XVSQRTDP  $dst,$src\t// sqrt packed2D" %}
14056   size(4);
14057   ins_encode %{
14058     __ xvsqrtdp($dst$$VectorSRegister, $src$$VectorSRegister);
14059   %}
14060   ins_pipe(pipe_class_default);
14061 %}
14062 
14063 // Vector Population Count Instructions
14064 
14065 instruct vpopcnt4I_reg(vecX dst, vecX src) %{
14066   match(Set dst (PopCountVI src));
14067   predicate(n->as_Vector()->length() == 4);
14068   format %{ "VPOPCNTW $dst,$src\t// pop count packed4I" %}
14069   size(4);
14070   ins_encode %{
14071     __ vpopcntw($dst$$VectorSRegister->to_vr(), $src$$VectorSRegister->to_vr());
14072   %}
14073   ins_pipe(pipe_class_default);
14074 %}
14075 
14076 // --------------------------------- FMA --------------------------------------
14077 // dst + src1 * src2
14078 instruct vfma4F(vecX dst, vecX src1, vecX src2) %{
14079   match(Set dst (FmaVF dst (Binary src1 src2)));
14080   predicate(n->as_Vector()->length() == 4);
14081 
14082   format %{ "XVMADDASP   $dst, $src1, $src2" %}
14083 
14084   size(4);
14085   ins_encode %{
14086     __ xvmaddasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14087   %}
14088   ins_pipe(pipe_class_default);
14089 %}
14090 
14091 // dst - src1 * src2
14092 instruct vfma4F_neg1(vecX dst, vecX src1, vecX src2) %{
14093   match(Set dst (FmaVF dst (Binary (NegVF src1) src2)));
14094   match(Set dst (FmaVF dst (Binary src1 (NegVF src2))));
14095   predicate(n->as_Vector()->length() == 4);
14096 
14097   format %{ "XVNMSUBASP   $dst, $src1, $src2" %}
14098 
14099   size(4);
14100   ins_encode %{
14101     __ xvnmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14102   %}
14103   ins_pipe(pipe_class_default);
14104 %}
14105 
14106 // - dst + src1 * src2
14107 instruct vfma4F_neg2(vecX dst, vecX src1, vecX src2) %{
14108   match(Set dst (FmaVF (NegVF dst) (Binary src1 src2)));
14109   predicate(n->as_Vector()->length() == 4);
14110 
14111   format %{ "XVMSUBASP   $dst, $src1, $src2" %}
14112 
14113   size(4);
14114   ins_encode %{
14115     __ xvmsubasp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14116   %}
14117   ins_pipe(pipe_class_default);
14118 %}
14119 
14120 // dst + src1 * src2
14121 instruct vfma2D(vecX dst, vecX src1, vecX src2) %{
14122   match(Set dst (FmaVD  dst (Binary src1 src2)));
14123   predicate(n->as_Vector()->length() == 2);
14124 
14125   format %{ "XVMADDADP   $dst, $src1, $src2" %}
14126 
14127   size(4);
14128   ins_encode %{
14129     __ xvmaddadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14130   %}
14131   ins_pipe(pipe_class_default);
14132 %}
14133 
14134 // dst - src1 * src2
14135 instruct vfma2D_neg1(vecX dst, vecX src1, vecX src2) %{
14136   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
14137   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
14138   predicate(n->as_Vector()->length() == 2);
14139 
14140   format %{ "XVNMSUBADP   $dst, $src1, $src2" %}
14141 
14142   size(4);
14143   ins_encode %{
14144     __ xvnmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14145   %}
14146   ins_pipe(pipe_class_default);
14147 %}
14148 
14149 // - dst + src1 * src2
14150 instruct vfma2D_neg2(vecX dst, vecX src1, vecX src2) %{
14151   match(Set dst (FmaVD (NegVD dst) (Binary src1 src2)));
14152   predicate(n->as_Vector()->length() == 2);
14153 
14154   format %{ "XVMSUBADP   $dst, $src1, $src2" %}
14155 
14156   size(4);
14157   ins_encode %{
14158     __ xvmsubadp($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister);
14159   %}
14160   ins_pipe(pipe_class_default);
14161 %}
14162 
14163 //----------Overflow Math Instructions-----------------------------------------
14164 
14165 // Note that we have to make sure that XER.SO is reset before using overflow instructions.
14166 // Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
14167 // Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
14168 
14169 instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14170   match(Set cr0 (OverflowAddL op1 op2));
14171 
14172   format %{ "add_    $op1, $op2\t# overflow check long" %}
14173   ins_encode %{
14174     __ li(R0, 0);
14175     __ mtxer(R0); // clear XER.SO
14176     __ addo_(R0, $op1$$Register, $op2$$Register);
14177   %}
14178   ins_pipe(pipe_class_default);
14179 %}
14180 
14181 instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14182   match(Set cr0 (OverflowSubL op1 op2));
14183 
14184   format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
14185   ins_encode %{
14186     __ li(R0, 0);
14187     __ mtxer(R0); // clear XER.SO
14188     __ subfo_(R0, $op2$$Register, $op1$$Register);
14189   %}
14190   ins_pipe(pipe_class_default);
14191 %}
14192 
14193 instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
14194   match(Set cr0 (OverflowSubL zero op2));
14195 
14196   format %{ "nego_   R0, $op2\t# overflow check long" %}
14197   ins_encode %{
14198     __ li(R0, 0);
14199     __ mtxer(R0); // clear XER.SO
14200     __ nego_(R0, $op2$$Register);
14201   %}
14202   ins_pipe(pipe_class_default);
14203 %}
14204 
14205 instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
14206   match(Set cr0 (OverflowMulL op1 op2));
14207 
14208   format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
14209   ins_encode %{
14210     __ li(R0, 0);
14211     __ mtxer(R0); // clear XER.SO
14212     __ mulldo_(R0, $op1$$Register, $op2$$Register);
14213   %}
14214   ins_pipe(pipe_class_default);
14215 %}
14216 
14217 instruct repl4F_reg_Ex(vecX dst, regF src) %{
14218   match(Set dst (ReplicateF src));
14219   predicate(n->as_Vector()->length() == 4);
14220   ins_cost(DEFAULT_COST);
14221   expand %{
14222     vecX tmpV;
14223     immI8  zero %{ (int)  0 %}
14224 
14225     xscvdpspn_regF(tmpV, src);
14226     xxspltw(dst, tmpV, zero);
14227   %}
14228 %}
14229 
14230 instruct repl4F_immF_Ex(vecX dst, immF src, iRegLdst tmp) %{
14231   match(Set dst (ReplicateF src));
14232   predicate(n->as_Vector()->length() == 4);
14233   effect(TEMP tmp);
14234   ins_cost(10 * DEFAULT_COST);
14235 
14236   postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase, tmp) );
14237 %}
14238 
14239 instruct repl4F_immF0(vecX dst, immF_0 zero) %{
14240   match(Set dst (ReplicateF zero));
14241   predicate(n->as_Vector()->length() == 4);
14242 
14243   format %{ "XXLXOR      $dst, $zero \t// replicate4F" %}
14244   ins_encode %{
14245     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14246   %}
14247   ins_pipe(pipe_class_default);
14248 %}
14249 
14250 instruct repl2D_reg_Ex(vecX dst, regD src) %{
14251   match(Set dst (ReplicateD src));
14252   predicate(n->as_Vector()->length() == 2);
14253 
14254   format %{ "XXPERMDI      $dst, $src, $src, 0 \t// Splat doubleword" %}
14255   size(4);
14256   ins_encode %{
14257     __ xxpermdi($dst$$VectorSRegister, $src$$FloatRegister->to_vsr(), $src$$FloatRegister->to_vsr(), 0);
14258   %}
14259   ins_pipe(pipe_class_default);
14260 %}
14261 
14262 instruct repl2D_immD0(vecX dst, immD_0 zero) %{
14263   match(Set dst (ReplicateD zero));
14264   predicate(n->as_Vector()->length() == 2);
14265 
14266   format %{ "XXLXOR      $dst, $zero \t// replicate2D" %}
14267   size(4);
14268   ins_encode %{
14269     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14270   %}
14271   ins_pipe(pipe_class_default);
14272 %}
14273 
14274 instruct mtvsrd(vecX dst, iRegLsrc src) %{
14275   predicate(false);
14276   effect(DEF dst, USE src);
14277 
14278   format %{ "MTVSRD      $dst, $src \t// Move to 16-byte register" %}
14279   size(4);
14280   ins_encode %{
14281     __ mtvsrd($dst$$VectorSRegister, $src$$Register);
14282   %}
14283   ins_pipe(pipe_class_default);
14284 %}
14285 
14286 instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
14287   effect(DEF dst, USE src, USE zero);
14288 
14289   format %{ "XXSPLATD      $dst, $src, $zero \t// Splat doubleword" %}
14290   size(4);
14291   ins_encode %{
14292     __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
14293   %}
14294   ins_pipe(pipe_class_default);
14295 %}
14296 
14297 instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
14298   effect(DEF dst, USE src1, USE src2, USE zero);
14299 
14300   format %{ "XXPERMDI      $dst, $src1, $src2, $zero \t// Splat doubleword" %}
14301   size(4);
14302   ins_encode %{
14303     __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
14304   %}
14305   ins_pipe(pipe_class_default);
14306 %}
14307 
14308 instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
14309   match(Set dst (ReplicateL src));
14310   predicate(n->as_Vector()->length() == 2);
14311   expand %{
14312     vecX tmpV;
14313     immI8  zero %{ (int)  0 %}
14314     mtvsrd(tmpV, src);
14315     xxpermdi(dst, tmpV, tmpV, zero);
14316   %}
14317 %}
14318 
14319 instruct repl2L_immI0(vecX dst, immI_0 zero) %{
14320   match(Set dst (ReplicateL zero));
14321   predicate(n->as_Vector()->length() == 2);
14322 
14323   format %{ "XXLXOR      $dst, $zero \t// replicate2L" %}
14324   size(4);
14325   ins_encode %{
14326     __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14327   %}
14328   ins_pipe(pipe_class_default);
14329 %}
14330 
14331 instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
14332   match(Set dst (ReplicateL src));
14333   predicate(n->as_Vector()->length() == 2);
14334 
14335   format %{ "XXLEQV      $dst, $src \t// replicate2L" %}
14336   size(4);
14337   ins_encode %{
14338     __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
14339   %}
14340   ins_pipe(pipe_class_default);
14341 %}
14342 
14343 // ============================================================================
14344 // Safepoint Instruction
14345 
14346 instruct safePoint_poll(iRegPdst poll) %{
14347   match(SafePoint poll);
14348 
14349   // It caused problems to add the effect that r0 is killed, but this
14350   // effect no longer needs to be mentioned, since r0 is not contained
14351   // in a reg_class.
14352 
14353   format %{ "LD      R0, #0, $poll \t// Safepoint poll for GC" %}
14354   size(4);
14355   ins_encode( enc_poll(0x0, poll) );
14356   ins_pipe(pipe_class_default);
14357 %}
14358 
14359 // ============================================================================
14360 // Call Instructions
14361 
14362 // Call Java Static Instruction
14363 
14364 // Schedulable version of call static node.
14365 instruct CallStaticJavaDirect(method meth) %{
14366   match(CallStaticJava);
14367   effect(USE meth);
14368   ins_cost(CALL_COST);
14369 
14370   ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
14371 
14372   format %{ "CALL,static $meth \t// ==> " %}
14373   size(4);
14374   ins_encode( enc_java_static_call(meth) );
14375   ins_pipe(pipe_class_call);
14376 %}
14377 
14378 // Call Java Dynamic Instruction
14379 
14380 // Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
14381 // Loading of IC was postalloc expanded. The nodes loading the IC are reachable
14382 // via fields ins_field_load_ic_hi_node and ins_field_load_ic_node.
14383 // The call destination must still be placed in the constant pool.
14384 instruct CallDynamicJavaDirectSched(method meth) %{
14385   match(CallDynamicJava); // To get all the data fields we need ...
14386   effect(USE meth);
14387   predicate(false);       // ... but never match.
14388 
14389   ins_field_load_ic_hi_node(loadConL_hiNode*);
14390   ins_field_load_ic_node(loadConLNode*);
14391   ins_num_consts(1 /* 1 patchable constant: call destination */);
14392 
14393   format %{ "BL        \t// dynamic $meth ==> " %}
14394   size(4);
14395   ins_encode( enc_java_dynamic_call_sched(meth) );
14396   ins_pipe(pipe_class_call);
14397 %}
14398 
14399 // Schedulable (i.e. postalloc expanded) version of call dynamic java.
14400 // We use postalloc expanded calls if we use inline caches
14401 // and do not update method data.
14402 //
14403 // This instruction has two constants: inline cache (IC) and call destination.
14404 // Loading the inline cache will be postalloc expanded, thus leaving a call with
14405 // one constant.
14406 instruct CallDynamicJavaDirectSched_Ex(method meth) %{
14407   match(CallDynamicJava);
14408   effect(USE meth);
14409   predicate(UseInlineCaches);
14410   ins_cost(CALL_COST);
14411 
14412   ins_num_consts(2 /* 2 patchable constants: inline cache, call destination. */);
14413 
14414   format %{ "CALL,dynamic $meth \t// postalloc expanded" %}
14415   postalloc_expand( postalloc_expand_java_dynamic_call_sched(meth, constanttablebase) );
14416 %}
14417 
14418 // Compound version of call dynamic java
14419 // We use postalloc expanded calls if we use inline caches
14420 // and do not update method data.
14421 instruct CallDynamicJavaDirect(method meth) %{
14422   match(CallDynamicJava);
14423   effect(USE meth);
14424   predicate(!UseInlineCaches);
14425   ins_cost(CALL_COST);
14426 
14427   // Enc_java_to_runtime_call needs up to 4 constants (method data oop).
14428   ins_num_consts(4);
14429 
14430   format %{ "CALL,dynamic $meth \t// ==> " %}
14431   ins_encode( enc_java_dynamic_call(meth, constanttablebase) );
14432   ins_pipe(pipe_class_call);
14433 %}
14434 
14435 // Call Runtime Instruction
14436 
14437 instruct CallRuntimeDirect(method meth) %{
14438   match(CallRuntime);
14439   effect(USE meth);
14440   ins_cost(CALL_COST);
14441 
14442   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14443   // env for callee, C-toc.
14444   ins_num_consts(3);
14445 
14446   format %{ "CALL,runtime" %}
14447   ins_encode( enc_java_to_runtime_call(meth) );
14448   ins_pipe(pipe_class_call);
14449 %}
14450 
14451 // Call Leaf
14452 
14453 // Used by postalloc expand of CallLeafDirect_Ex (mtctr).
14454 instruct CallLeafDirect_mtctr(iRegLdst dst, iRegLsrc src) %{
14455   effect(DEF dst, USE src);
14456 
14457   ins_num_consts(1);
14458 
14459   format %{ "MTCTR   $src" %}
14460   size(4);
14461   ins_encode( enc_leaf_call_mtctr(src) );
14462   ins_pipe(pipe_class_default);
14463 %}
14464 
14465 // Used by postalloc expand of CallLeafDirect_Ex (actual call).
14466 instruct CallLeafDirect(method meth) %{
14467   match(CallLeaf);   // To get the data all the data fields we need ...
14468   effect(USE meth);
14469   predicate(false);  // but never match.
14470 
14471   format %{ "BCTRL     \t// leaf call $meth ==> " %}
14472   size(4);
14473   ins_encode %{
14474     __ bctrl();
14475   %}
14476   ins_pipe(pipe_class_call);
14477 %}
14478 
14479 // postalloc expand of CallLeafDirect.
14480 // Load adress to call from TOC, then bl to it.
14481 instruct CallLeafDirect_Ex(method meth) %{
14482   match(CallLeaf);
14483   effect(USE meth);
14484   ins_cost(CALL_COST);
14485 
14486   // Postalloc_expand_java_to_runtime_call needs up to 3 constants: call target,
14487   // env for callee, C-toc.
14488   ins_num_consts(3);
14489 
14490   format %{ "CALL,runtime leaf $meth \t// postalloc expanded" %}
14491   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14492 %}
14493 
14494 // Call runtime without safepoint - same as CallLeaf.
14495 // postalloc expand of CallLeafNoFPDirect.
14496 // Load adress to call from TOC, then bl to it.
14497 instruct CallLeafNoFPDirect_Ex(method meth) %{
14498   match(CallLeafNoFP);
14499   effect(USE meth);
14500   ins_cost(CALL_COST);
14501 
14502   // Enc_java_to_runtime_call needs up to 3 constants: call target,
14503   // env for callee, C-toc.
14504   ins_num_consts(3);
14505 
14506   format %{ "CALL,runtime leaf nofp $meth \t// postalloc expanded" %}
14507   postalloc_expand( postalloc_expand_java_to_runtime_call(meth, constanttablebase) );
14508 %}
14509 
14510 // Tail Call; Jump from runtime stub to Java code.
14511 // Also known as an 'interprocedural jump'.
14512 // Target of jump will eventually return to caller.
14513 // TailJump below removes the return address.
14514 instruct TailCalljmpInd(iRegPdstNoScratch jump_target, inline_cache_regP method_ptr) %{
14515   match(TailCall jump_target method_ptr);
14516   ins_cost(CALL_COST);
14517 
14518   format %{ "MTCTR   $jump_target \t// $method_ptr holds method\n\t"
14519             "BCTR         \t// tail call" %}
14520   size(8);
14521   ins_encode %{
14522     __ mtctr($jump_target$$Register);
14523     __ bctr();
14524   %}
14525   ins_pipe(pipe_class_call);
14526 %}
14527 
14528 // Return Instruction
14529 instruct Ret() %{
14530   match(Return);
14531   format %{ "BLR      \t// branch to link register" %}
14532   size(4);
14533   ins_encode %{
14534     // LR is restored in MachEpilogNode. Just do the RET here.
14535     __ blr();
14536   %}
14537   ins_pipe(pipe_class_default);
14538 %}
14539 
14540 // Tail Jump; remove the return address; jump to target.
14541 // TailCall above leaves the return address around.
14542 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
14543 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
14544 // "restore" before this instruction (in Epilogue), we need to materialize it
14545 // in %i0.
14546 instruct tailjmpInd(iRegPdstNoScratch jump_target, rarg1RegP ex_oop) %{
14547   match(TailJump jump_target ex_oop);
14548   ins_cost(CALL_COST);
14549 
14550   format %{ "LD      R4_ARG2 = LR\n\t"
14551             "MTCTR   $jump_target\n\t"
14552             "BCTR     \t// TailJump, exception oop: $ex_oop" %}
14553   size(12);
14554   ins_encode %{
14555     __ ld(R4_ARG2/* issuing pc */, _abi0(lr), R1_SP);
14556     __ mtctr($jump_target$$Register);
14557     __ bctr();
14558   %}
14559   ins_pipe(pipe_class_call);
14560 %}
14561 
14562 // Create exception oop: created by stack-crawling runtime code.
14563 // Created exception is now available to this handler, and is setup
14564 // just prior to jumping to this handler. No code emitted.
14565 instruct CreateException(rarg1RegP ex_oop) %{
14566   match(Set ex_oop (CreateEx));
14567   ins_cost(0);
14568 
14569   format %{ " -- \t// exception oop; no code emitted" %}
14570   size(0);
14571   ins_encode( /*empty*/ );
14572   ins_pipe(pipe_class_default);
14573 %}
14574 
14575 // Rethrow exception: The exception oop will come in the first
14576 // argument position. Then JUMP (not call) to the rethrow stub code.
14577 instruct RethrowException() %{
14578   match(Rethrow);
14579   ins_cost(CALL_COST);
14580 
14581   format %{ "Jmp     rethrow_stub" %}
14582   ins_encode %{
14583     cbuf.set_insts_mark();
14584     __ b64_patchable((address)OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type);
14585   %}
14586   ins_pipe(pipe_class_call);
14587 %}
14588 
14589 // Die now.
14590 instruct ShouldNotReachHere() %{
14591   match(Halt);
14592   ins_cost(CALL_COST);
14593 
14594   format %{ "ShouldNotReachHere" %}
14595   ins_encode %{
14596     if (is_reachable()) {
14597       __ stop(_halt_reason);
14598     }
14599   %}
14600   ins_pipe(pipe_class_default);
14601 %}
14602 
14603 // This name is KNOWN by the ADLC and cannot be changed.  The ADLC
14604 // forces a 'TypeRawPtr::BOTTOM' output type for this guy.
14605 // Get a DEF on threadRegP, no costs, no encoding, use
14606 // 'ins_should_rematerialize(true)' to avoid spilling.
14607 instruct tlsLoadP(threadRegP dst) %{
14608   match(Set dst (ThreadLocal));
14609   ins_cost(0);
14610 
14611   ins_should_rematerialize(true);
14612 
14613   format %{ " -- \t// $dst=Thread::current(), empty" %}
14614   size(0);
14615   ins_encode( /*empty*/ );
14616   ins_pipe(pipe_class_empty);
14617 %}
14618 
14619 //---Some PPC specific nodes---------------------------------------------------
14620 
14621 // Stop a group.
14622 instruct endGroup() %{
14623   ins_cost(0);
14624 
14625   ins_is_nop(true);
14626 
14627   format %{ "End Bundle (ori r1, r1, 0)" %}
14628   size(4);
14629   ins_encode %{
14630     __ endgroup();
14631   %}
14632   ins_pipe(pipe_class_default);
14633 %}
14634 
14635 // Nop instructions
14636 
14637 instruct fxNop() %{
14638   ins_cost(0);
14639 
14640   ins_is_nop(true);
14641 
14642   format %{ "fxNop" %}
14643   size(4);
14644   ins_encode %{
14645     __ nop();
14646   %}
14647   ins_pipe(pipe_class_default);
14648 %}
14649 
14650 instruct fpNop0() %{
14651   ins_cost(0);
14652 
14653   ins_is_nop(true);
14654 
14655   format %{ "fpNop0" %}
14656   size(4);
14657   ins_encode %{
14658     __ fpnop0();
14659   %}
14660   ins_pipe(pipe_class_default);
14661 %}
14662 
14663 instruct fpNop1() %{
14664   ins_cost(0);
14665 
14666   ins_is_nop(true);
14667 
14668   format %{ "fpNop1" %}
14669   size(4);
14670   ins_encode %{
14671     __ fpnop1();
14672   %}
14673   ins_pipe(pipe_class_default);
14674 %}
14675 
14676 instruct brNop0() %{
14677   ins_cost(0);
14678   size(4);
14679   format %{ "brNop0" %}
14680   ins_encode %{
14681     __ brnop0();
14682   %}
14683   ins_is_nop(true);
14684   ins_pipe(pipe_class_default);
14685 %}
14686 
14687 instruct brNop1() %{
14688   ins_cost(0);
14689 
14690   ins_is_nop(true);
14691 
14692   format %{ "brNop1" %}
14693   size(4);
14694   ins_encode %{
14695     __ brnop1();
14696   %}
14697   ins_pipe(pipe_class_default);
14698 %}
14699 
14700 instruct brNop2() %{
14701   ins_cost(0);
14702 
14703   ins_is_nop(true);
14704 
14705   format %{ "brNop2" %}
14706   size(4);
14707   ins_encode %{
14708     __ brnop2();
14709   %}
14710   ins_pipe(pipe_class_default);
14711 %}
14712 
14713 instruct cacheWB(indirect addr)
14714 %{
14715   match(CacheWB addr);
14716 
14717   ins_cost(100);
14718   format %{ "cache writeback, address = $addr" %}
14719   ins_encode %{
14720     assert($addr->index_position() < 0, "should be");
14721     assert($addr$$disp == 0, "should be");
14722     __ cache_wb(Address($addr$$base$$Register));
14723   %}
14724   ins_pipe(pipe_class_default);
14725 %}
14726 
14727 instruct cacheWBPreSync()
14728 %{
14729   match(CacheWBPreSync);
14730 
14731   ins_cost(0);
14732   format %{ "cache writeback presync" %}
14733   ins_encode %{
14734     __ cache_wbsync(true);
14735   %}
14736   ins_pipe(pipe_class_default);
14737 %}
14738 
14739 instruct cacheWBPostSync()
14740 %{
14741   match(CacheWBPostSync);
14742 
14743   ins_cost(100);
14744   format %{ "cache writeback postsync" %}
14745   ins_encode %{
14746     __ cache_wbsync(false);
14747   %}
14748   ins_pipe(pipe_class_default);
14749 %}
14750 
14751 //----------PEEPHOLE RULES-----------------------------------------------------
14752 // These must follow all instruction definitions as they use the names
14753 // defined in the instructions definitions.
14754 //
14755 // peepmatch ( root_instr_name [preceeding_instruction]* );
14756 //
14757 // peepconstraint %{
14758 // (instruction_number.operand_name relational_op instruction_number.operand_name
14759 //  [, ...] );
14760 // // instruction numbers are zero-based using left to right order in peepmatch
14761 //
14762 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
14763 // // provide an instruction_number.operand_name for each operand that appears
14764 // // in the replacement instruction's match rule
14765 //
14766 // ---------VM FLAGS---------------------------------------------------------
14767 //
14768 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14769 //
14770 // Each peephole rule is given an identifying number starting with zero and
14771 // increasing by one in the order seen by the parser. An individual peephole
14772 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14773 // on the command-line.
14774 //
14775 // ---------CURRENT LIMITATIONS----------------------------------------------
14776 //
14777 // Only match adjacent instructions in same basic block
14778 // Only equality constraints
14779 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14780 // Only one replacement instruction
14781 //
14782 // ---------EXAMPLE----------------------------------------------------------
14783 //
14784 // // pertinent parts of existing instructions in architecture description
14785 // instruct movI(eRegI dst, eRegI src) %{
14786 //   match(Set dst (CopyI src));
14787 // %}
14788 //
14789 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14790 //   match(Set dst (AddI dst src));
14791 //   effect(KILL cr);
14792 // %}
14793 //
14794 // // Change (inc mov) to lea
14795 // peephole %{
14796 //   // increment preceeded by register-register move
14797 //   peepmatch ( incI_eReg movI );
14798 //   // require that the destination register of the increment
14799 //   // match the destination register of the move
14800 //   peepconstraint ( 0.dst == 1.dst );
14801 //   // construct a replacement instruction that sets
14802 //   // the destination to ( move's source register + one )
14803 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14804 // %}
14805 //
14806 // Implementation no longer uses movX instructions since
14807 // machine-independent system no longer uses CopyX nodes.
14808 //
14809 // peephole %{
14810 //   peepmatch ( incI_eReg movI );
14811 //   peepconstraint ( 0.dst == 1.dst );
14812 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14813 // %}
14814 //
14815 // peephole %{
14816 //   peepmatch ( decI_eReg movI );
14817 //   peepconstraint ( 0.dst == 1.dst );
14818 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14819 // %}
14820 //
14821 // peephole %{
14822 //   peepmatch ( addI_eReg_imm movI );
14823 //   peepconstraint ( 0.dst == 1.dst );
14824 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14825 // %}
14826 //
14827 // peephole %{
14828 //   peepmatch ( addP_eReg_imm movP );
14829 //   peepconstraint ( 0.dst == 1.dst );
14830 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14831 // %}
14832 
14833 // // Change load of spilled value to only a spill
14834 // instruct storeI(memory mem, eRegI src) %{
14835 //   match(Set mem (StoreI mem src));
14836 // %}
14837 //
14838 // instruct loadI(eRegI dst, memory mem) %{
14839 //   match(Set dst (LoadI mem));
14840 // %}
14841 //
14842 peephole %{
14843   peepmatch ( loadI storeI );
14844   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14845   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14846 %}
14847 
14848 peephole %{
14849   peepmatch ( loadL storeL );
14850   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14851   peepreplace ( storeL( 1.mem 1.mem 1.src ) );
14852 %}
14853 
14854 peephole %{
14855   peepmatch ( loadP storeP );
14856   peepconstraint ( 1.src == 0.dst, 1.dst == 0.mem );
14857   peepreplace ( storeP( 1.dst 1.dst 1.src ) );
14858 %}
14859 
14860 //----------SMARTSPILL RULES---------------------------------------------------
14861 // These must follow all instruction definitions as they use the names
14862 // defined in the instructions definitions.